vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
vectordb_bench/__init__.py
CHANGED
@@ -22,46 +22,71 @@ class config:
|
|
22
22
|
DROP_OLD = env.bool("DROP_OLD", True)
|
23
23
|
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
|
24
24
|
|
25
|
-
NUM_CONCURRENCY = env.list(
|
25
|
+
NUM_CONCURRENCY = env.list(
|
26
|
+
"NUM_CONCURRENCY",
|
27
|
+
[
|
28
|
+
1,
|
29
|
+
5,
|
30
|
+
10,
|
31
|
+
15,
|
32
|
+
20,
|
33
|
+
25,
|
34
|
+
30,
|
35
|
+
35,
|
36
|
+
40,
|
37
|
+
45,
|
38
|
+
50,
|
39
|
+
55,
|
40
|
+
60,
|
41
|
+
65,
|
42
|
+
70,
|
43
|
+
75,
|
44
|
+
80,
|
45
|
+
85,
|
46
|
+
90,
|
47
|
+
95,
|
48
|
+
100,
|
49
|
+
],
|
50
|
+
subcast=int,
|
51
|
+
)
|
26
52
|
|
27
53
|
CONCURRENCY_DURATION = 30
|
28
54
|
|
29
55
|
RESULTS_LOCAL_DIR = env.path(
|
30
|
-
"RESULTS_LOCAL_DIR",
|
56
|
+
"RESULTS_LOCAL_DIR",
|
57
|
+
pathlib.Path(__file__).parent.joinpath("results"),
|
31
58
|
)
|
32
59
|
CONFIG_LOCAL_DIR = env.path(
|
33
|
-
"CONFIG_LOCAL_DIR",
|
60
|
+
"CONFIG_LOCAL_DIR",
|
61
|
+
pathlib.Path(__file__).parent.joinpath("config-files"),
|
34
62
|
)
|
35
63
|
|
36
|
-
|
37
64
|
K_DEFAULT = 100 # default return top k nearest neighbors during search
|
38
65
|
CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
|
39
66
|
|
40
|
-
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600
|
41
|
-
LOAD_TIMEOUT_DEFAULT
|
42
|
-
LOAD_TIMEOUT_768D_1M
|
43
|
-
LOAD_TIMEOUT_768D_10M
|
44
|
-
LOAD_TIMEOUT_768D_100M
|
67
|
+
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
|
68
|
+
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
|
69
|
+
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
|
70
|
+
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
|
71
|
+
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
|
45
72
|
|
46
|
-
LOAD_TIMEOUT_1536D_500K
|
47
|
-
LOAD_TIMEOUT_1536D_5M
|
73
|
+
LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
74
|
+
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
48
75
|
|
49
|
-
OPTIMIZE_TIMEOUT_DEFAULT
|
50
|
-
OPTIMIZE_TIMEOUT_768D_1M
|
51
|
-
OPTIMIZE_TIMEOUT_768D_10M
|
52
|
-
OPTIMIZE_TIMEOUT_768D_100M
|
76
|
+
OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
|
77
|
+
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
|
78
|
+
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
|
79
|
+
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
|
53
80
|
|
81
|
+
OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
82
|
+
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
54
83
|
|
55
|
-
OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
56
|
-
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
57
|
-
|
58
84
|
def display(self) -> str:
|
59
|
-
|
60
|
-
i
|
61
|
-
|
62
|
-
and not i[0].startswith(
|
63
|
-
and "TIMEOUT" not in i[0]
|
85
|
+
return [
|
86
|
+
i
|
87
|
+
for i in inspect.getmembers(self)
|
88
|
+
if not inspect.ismethod(i[1]) and not i[0].startswith("_") and "TIMEOUT" not in i[0]
|
64
89
|
]
|
65
|
-
|
90
|
+
|
66
91
|
|
67
92
|
log_util.init(config.LOG_LEVEL)
|
vectordb_bench/__main__.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
import traceback
|
2
1
|
import logging
|
2
|
+
import pathlib
|
3
3
|
import subprocess
|
4
|
-
import
|
4
|
+
import traceback
|
5
|
+
|
5
6
|
from . import config
|
6
7
|
|
7
8
|
log = logging.getLogger("vectordb_bench")
|
@@ -16,7 +17,7 @@ def run_streamlit():
|
|
16
17
|
cmd = [
|
17
18
|
"streamlit",
|
18
19
|
"run",
|
19
|
-
f"{
|
20
|
+
f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
|
20
21
|
"--logger.level",
|
21
22
|
"info",
|
22
23
|
"--theme.base",
|
@@ -1,24 +1,25 @@
|
|
1
|
-
from .cases import CaseLabel
|
2
|
-
from .task_runner import CaseRunner, RunningStatus, TaskRunner
|
3
|
-
from ..models import TaskConfig
|
4
|
-
from ..backend.clients import EmptyDBCaseConfig
|
5
|
-
from ..backend.data_source import DatasetSource
|
6
1
|
import logging
|
7
2
|
|
3
|
+
from vectordb_bench.backend.clients import EmptyDBCaseConfig
|
4
|
+
from vectordb_bench.backend.data_source import DatasetSource
|
5
|
+
from vectordb_bench.models import TaskConfig
|
6
|
+
|
7
|
+
from .cases import CaseLabel
|
8
|
+
from .task_runner import CaseRunner, RunningStatus, TaskRunner
|
8
9
|
|
9
10
|
log = logging.getLogger(__name__)
|
10
11
|
|
11
12
|
|
12
13
|
class Assembler:
|
13
14
|
@classmethod
|
14
|
-
def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
|
15
|
+
def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
|
15
16
|
c_cls = task.case_config.case_id.case_cls
|
16
17
|
|
17
18
|
c = c_cls(task.case_config.custom_case)
|
18
|
-
if type(task.db_case_config)
|
19
|
+
if type(task.db_case_config) is not EmptyDBCaseConfig:
|
19
20
|
task.db_case_config.metric_type = c.dataset.data.metric_type
|
20
21
|
|
21
|
-
|
22
|
+
return CaseRunner(
|
22
23
|
run_id=run_id,
|
23
24
|
config=task,
|
24
25
|
ca=c,
|
@@ -26,8 +27,6 @@ class Assembler:
|
|
26
27
|
dataset_source=source,
|
27
28
|
)
|
28
29
|
|
29
|
-
return runner
|
30
|
-
|
31
30
|
@classmethod
|
32
31
|
def assemble_all(
|
33
32
|
cls,
|
@@ -50,12 +49,12 @@ class Assembler:
|
|
50
49
|
db2runner[db].append(r)
|
51
50
|
|
52
51
|
# check dbclient installed
|
53
|
-
for k in db2runner
|
52
|
+
for k in db2runner:
|
54
53
|
_ = k.init_cls
|
55
54
|
|
56
55
|
# sort by dataset size
|
57
|
-
for
|
58
|
-
|
56
|
+
for _, runner in db2runner.items():
|
57
|
+
runner.sort(key=lambda x: x.ca.dataset.data.size)
|
59
58
|
|
60
59
|
all_runners = []
|
61
60
|
all_runners.extend(load_runners)
|
vectordb_bench/backend/cases.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
import typing
|
2
1
|
import logging
|
3
2
|
from enum import Enum, auto
|
4
|
-
from typing import Type
|
5
3
|
|
6
4
|
from vectordb_bench import config
|
7
5
|
from vectordb_bench.backend.clients.api import MetricType
|
@@ -12,7 +10,6 @@ from vectordb_bench.frontend.components.custom.getCustomConfig import (
|
|
12
10
|
|
13
11
|
from .dataset import CustomDataset, Dataset, DatasetManager
|
14
12
|
|
15
|
-
|
16
13
|
log = logging.getLogger(__name__)
|
17
14
|
|
18
15
|
|
@@ -50,11 +47,10 @@ class CaseType(Enum):
|
|
50
47
|
Custom = 100
|
51
48
|
PerformanceCustomDataset = 101
|
52
49
|
|
53
|
-
def case_cls(self, custom_configs: dict | None = None) ->
|
50
|
+
def case_cls(self, custom_configs: dict | None = None) -> type["Case"]:
|
54
51
|
if custom_configs is None:
|
55
52
|
return type2case.get(self)()
|
56
|
-
|
57
|
-
return type2case.get(self)(**custom_configs)
|
53
|
+
return type2case.get(self)(**custom_configs)
|
58
54
|
|
59
55
|
def case_name(self, custom_configs: dict | None = None) -> str:
|
60
56
|
c = self.case_cls(custom_configs)
|
@@ -99,10 +95,10 @@ class Case(BaseModel):
|
|
99
95
|
@property
|
100
96
|
def filters(self) -> dict | None:
|
101
97
|
if self.filter_rate is not None:
|
102
|
-
|
98
|
+
target_id = round(self.filter_rate * self.dataset.data.size)
|
103
99
|
return {
|
104
|
-
"metadata": f">={
|
105
|
-
"id":
|
100
|
+
"metadata": f">={target_id}",
|
101
|
+
"id": target_id,
|
106
102
|
}
|
107
103
|
|
108
104
|
return None
|
@@ -126,8 +122,8 @@ class CapacityDim960(CapacityCase):
|
|
126
122
|
case_id: CaseType = CaseType.CapacityDim960
|
127
123
|
dataset: DatasetManager = Dataset.GIST.manager(100_000)
|
128
124
|
name: str = "Capacity Test (960 Dim Repeated)"
|
129
|
-
description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension
|
130
|
-
vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
|
125
|
+
description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension
|
126
|
+
vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
|
131
127
|
reported."""
|
132
128
|
|
133
129
|
|
@@ -136,7 +132,7 @@ class CapacityDim128(CapacityCase):
|
|
136
132
|
dataset: DatasetManager = Dataset.SIFT.manager(500_000)
|
137
133
|
name: str = "Capacity Test (128 Dim Repeated)"
|
138
134
|
description: str = """This case tests the vector database's loading capacity by repeatedly inserting small-dimension
|
139
|
-
vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
|
135
|
+
vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
|
140
136
|
reported."""
|
141
137
|
|
142
138
|
|
@@ -144,8 +140,9 @@ class Performance768D10M(PerformanceCase):
|
|
144
140
|
case_id: CaseType = CaseType.Performance768D10M
|
145
141
|
dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
|
146
142
|
name: str = "Search Performance Test (10M Dataset, 768 Dim)"
|
147
|
-
description: str = """This case tests the search performance of a vector database with a large dataset
|
148
|
-
|
143
|
+
description: str = """This case tests the search performance of a vector database with a large dataset
|
144
|
+
(<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
|
145
|
+
Results will show index building time, recall, and maximum QPS."""
|
149
146
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
|
150
147
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
|
151
148
|
|
@@ -154,8 +151,9 @@ class Performance768D1M(PerformanceCase):
|
|
154
151
|
case_id: CaseType = CaseType.Performance768D1M
|
155
152
|
dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
|
156
153
|
name: str = "Search Performance Test (1M Dataset, 768 Dim)"
|
157
|
-
description: str = """This case tests the search performance of a vector database with a medium dataset
|
158
|
-
|
154
|
+
description: str = """This case tests the search performance of a vector database with a medium dataset
|
155
|
+
(<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
|
156
|
+
Results will show index building time, recall, and maximum QPS."""
|
159
157
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
|
160
158
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
|
161
159
|
|
@@ -165,8 +163,9 @@ class Performance768D10M1P(PerformanceCase):
|
|
165
163
|
filter_rate: float | int | None = 0.01
|
166
164
|
dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
|
167
165
|
name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 1%)"
|
168
|
-
description: str = """This case tests the search performance of a vector database with a large dataset
|
169
|
-
|
166
|
+
description: str = """This case tests the search performance of a vector database with a large dataset
|
167
|
+
(<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel
|
168
|
+
levels. Results will show index building time, recall, and maximum QPS."""
|
170
169
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
|
171
170
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
|
172
171
|
|
@@ -176,8 +175,9 @@ class Performance768D1M1P(PerformanceCase):
|
|
176
175
|
filter_rate: float | int | None = 0.01
|
177
176
|
dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
|
178
177
|
name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 1%)"
|
179
|
-
description: str = """This case tests the search performance of a vector database with a medium dataset
|
180
|
-
|
178
|
+
description: str = """This case tests the search performance of a vector database with a medium dataset
|
179
|
+
(<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>),
|
180
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
181
181
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
|
182
182
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
|
183
183
|
|
@@ -187,8 +187,9 @@ class Performance768D10M99P(PerformanceCase):
|
|
187
187
|
filter_rate: float | int | None = 0.99
|
188
188
|
dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
|
189
189
|
name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 99%)"
|
190
|
-
description: str = """This case tests the search performance of a vector database with a large dataset
|
191
|
-
|
190
|
+
description: str = """This case tests the search performance of a vector database with a large dataset
|
191
|
+
(<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>),
|
192
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
192
193
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
|
193
194
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
|
194
195
|
|
@@ -198,8 +199,9 @@ class Performance768D1M99P(PerformanceCase):
|
|
198
199
|
filter_rate: float | int | None = 0.99
|
199
200
|
dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
|
200
201
|
name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 99%)"
|
201
|
-
description: str = """This case tests the search performance of a vector database with a medium dataset
|
202
|
-
|
202
|
+
description: str = """This case tests the search performance of a vector database with a medium dataset
|
203
|
+
(<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>),
|
204
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
203
205
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
|
204
206
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
|
205
207
|
|
@@ -209,8 +211,9 @@ class Performance768D100M(PerformanceCase):
|
|
209
211
|
filter_rate: float | int | None = None
|
210
212
|
dataset: DatasetManager = Dataset.LAION.manager(100_000_000)
|
211
213
|
name: str = "Search Performance Test (100M Dataset, 768 Dim)"
|
212
|
-
description: str = """This case tests the search performance of a vector database with a large 100M dataset
|
213
|
-
Results will show index building time,
|
214
|
+
description: str = """This case tests the search performance of a vector database with a large 100M dataset
|
215
|
+
(<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels. Results will show index building time,
|
216
|
+
recall, and maximum QPS."""
|
214
217
|
load_timeout: float | int = config.LOAD_TIMEOUT_768D_100M
|
215
218
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_100M
|
216
219
|
|
@@ -220,8 +223,9 @@ class Performance1536D500K(PerformanceCase):
|
|
220
223
|
filter_rate: float | int | None = None
|
221
224
|
dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
|
222
225
|
name: str = "Search Performance Test (500K Dataset, 1536 Dim)"
|
223
|
-
description: str = """This case tests the search performance of a vector database with a medium 500K dataset
|
224
|
-
Results will show index building time,
|
226
|
+
description: str = """This case tests the search performance of a vector database with a medium 500K dataset
|
227
|
+
(<b>OpenAI 500K vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
|
228
|
+
recall, and maximum QPS."""
|
225
229
|
load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
|
226
230
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
|
227
231
|
|
@@ -231,8 +235,9 @@ class Performance1536D5M(PerformanceCase):
|
|
231
235
|
filter_rate: float | int | None = None
|
232
236
|
dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
|
233
237
|
name: str = "Search Performance Test (5M Dataset, 1536 Dim)"
|
234
|
-
description: str = """This case tests the search performance of a vector database with a medium 5M dataset
|
235
|
-
Results will show index building time,
|
238
|
+
description: str = """This case tests the search performance of a vector database with a medium 5M dataset
|
239
|
+
(<b>OpenAI 5M vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
|
240
|
+
recall, and maximum QPS."""
|
236
241
|
load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
|
237
242
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
|
238
243
|
|
@@ -242,8 +247,9 @@ class Performance1536D500K1P(PerformanceCase):
|
|
242
247
|
filter_rate: float | int | None = 0.01
|
243
248
|
dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
|
244
249
|
name: str = "Filtering Search Performance Test (500K Dataset, 1536 Dim, Filter 1%)"
|
245
|
-
description: str = """This case tests the search performance of a vector database with a large dataset
|
246
|
-
|
250
|
+
description: str = """This case tests the search performance of a vector database with a large dataset
|
251
|
+
(<b>OpenAI 500K vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>),
|
252
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
247
253
|
load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
|
248
254
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
|
249
255
|
|
@@ -253,8 +259,9 @@ class Performance1536D5M1P(PerformanceCase):
|
|
253
259
|
filter_rate: float | int | None = 0.01
|
254
260
|
dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
|
255
261
|
name: str = "Filtering Search Performance Test (5M Dataset, 1536 Dim, Filter 1%)"
|
256
|
-
description: str = """This case tests the search performance of a vector database with a large dataset
|
257
|
-
|
262
|
+
description: str = """This case tests the search performance of a vector database with a large dataset
|
263
|
+
(<b>OpenAI 5M vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>),
|
264
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
258
265
|
load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
|
259
266
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
|
260
267
|
|
@@ -264,8 +271,9 @@ class Performance1536D500K99P(PerformanceCase):
|
|
264
271
|
filter_rate: float | int | None = 0.99
|
265
272
|
dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
|
266
273
|
name: str = "Filtering Search Performance Test (500K Dataset, 1536 Dim, Filter 99%)"
|
267
|
-
description: str = """This case tests the search performance of a vector database with a medium dataset
|
268
|
-
|
274
|
+
description: str = """This case tests the search performance of a vector database with a medium dataset
|
275
|
+
(<b>OpenAI 500K vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>),
|
276
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
269
277
|
load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
|
270
278
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
|
271
279
|
|
@@ -275,8 +283,9 @@ class Performance1536D5M99P(PerformanceCase):
|
|
275
283
|
filter_rate: float | int | None = 0.99
|
276
284
|
dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
|
277
285
|
name: str = "Filtering Search Performance Test (5M Dataset, 1536 Dim, Filter 99%)"
|
278
|
-
description: str = """This case tests the search performance of a vector database with a medium dataset
|
279
|
-
|
286
|
+
description: str = """This case tests the search performance of a vector database with a medium dataset
|
287
|
+
(<b>OpenAI 5M vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>),
|
288
|
+
at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
|
280
289
|
load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
|
281
290
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
|
282
291
|
|
@@ -286,8 +295,9 @@ class Performance1536D50K(PerformanceCase):
|
|
286
295
|
filter_rate: float | int | None = None
|
287
296
|
dataset: DatasetManager = Dataset.OPENAI.manager(50_000)
|
288
297
|
name: str = "Search Performance Test (50K Dataset, 1536 Dim)"
|
289
|
-
description: str = """This case tests the search performance of a vector database with a medium 50K dataset
|
290
|
-
Results will show index building time,
|
298
|
+
description: str = """This case tests the search performance of a vector database with a medium 50K dataset
|
299
|
+
(<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
|
300
|
+
recall, and maximum QPS."""
|
291
301
|
load_timeout: float | int = 3600
|
292
302
|
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
|
293
303
|
|
@@ -312,11 +322,11 @@ class PerformanceCustomDataset(PerformanceCase):
|
|
312
322
|
|
313
323
|
def __init__(
|
314
324
|
self,
|
315
|
-
name,
|
316
|
-
description,
|
317
|
-
load_timeout,
|
318
|
-
optimize_timeout,
|
319
|
-
dataset_config,
|
325
|
+
name: str,
|
326
|
+
description: str,
|
327
|
+
load_timeout: float,
|
328
|
+
optimize_timeout: float,
|
329
|
+
dataset_config: dict,
|
320
330
|
**kwargs,
|
321
331
|
):
|
322
332
|
dataset_config = CustomDatasetConfig(**dataset_config)
|