vectordb-bench 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/.gitignore +2 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/PKG-INFO +95 -13
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/README.md +94 -12
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/pyproject.toml +3 -0
- vectordb-bench-0.0.3/tests/test_dataset.py +36 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/test_models.py +1 -1
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/ut_cases.py +7 -20
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/__init__.py +14 -3
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/cases.py +34 -13
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/__init__.py +6 -1
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/api.py +12 -8
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +4 -2
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/milvus/milvus.py +17 -10
- vectordb-bench-0.0.3/vectordb_bench/backend/clients/pgvector/config.py +49 -0
- vectordb-bench-0.0.3/vectordb_bench/backend/clients/pgvector/pgvector.py +171 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/pinecone/pinecone.py +4 -3
- vectordb-bench-0.0.3/vectordb_bench/backend/clients/qdrant_cloud/config.py +33 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +11 -11
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -5
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +3 -1
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/dataset.py +99 -149
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/result_collector.py +2 -2
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/runner/mp_runner.py +29 -13
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/runner/serial_runner.py +69 -51
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/task_runner.py +43 -48
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/get_results/saveAsImage.py +4 -2
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/const/dbCaseConfigs.py +35 -4
- vectordb-bench-0.0.3/vectordb_bench/frontend/const/dbPrices.py +6 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/const/styles.py +9 -3
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/metric.py +0 -1
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/models.py +12 -8
- vectordb-bench-0.0.3/vectordb_bench/results/dbPrices.json +32 -0
- vectordb-bench-0.0.3/vectordb_bench/results/getLeaderboardData.py +52 -0
- vectordb-bench-0.0.3/vectordb_bench/results/leaderboard.json +1 -0
- vectordb-bench-0.0.2/vectordb_bench/results/result_20230609_standard.json → vectordb-bench-0.0.3/vectordb_bench/results/result_20230705_standard.json +670 -214
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/PKG-INFO +95 -13
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/SOURCES.txt +6 -1
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/requires.txt +3 -0
- vectordb-bench-0.0.2/tests/test_dataset.py +0 -53
- vectordb-bench-0.0.2/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -15
- vectordb-bench-0.0.2/vectordb_bench/frontend/const/dbPrices.py +0 -34
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/.env.example +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/.ruff.toml +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/LICENSE +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/setup.cfg +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/conftest.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/pytest.ini +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/test_bench_runner.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/tests/test_utils.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/__main__.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/assembler.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/milvus/config.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/runner/__init__.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/backend/utils.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/base.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/data.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/pages/run_test.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/utils.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/interface.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench/log_util.py +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/entry_points.txt +0 -0
- {vectordb-bench-0.0.2 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -17,6 +17,7 @@ License-File: LICENSE
|
|
17
17
|
[](https://pypi.org/project/vectordb-bench/)
|
18
18
|
[](https://pepy.tech/project/vectordb-bench)
|
19
19
|
|
20
|
+
**Leaderboard:** https://zilliz.com/benchmark
|
20
21
|
## Quick Start
|
21
22
|
### Prerequirement
|
22
23
|
``` shell
|
@@ -41,6 +42,24 @@ Closely mimicking real-world production environments, we've set up diverse testi
|
|
41
42
|
|
42
43
|
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
43
44
|
|
45
|
+
## Leaderboard
|
46
|
+
### Introduction
|
47
|
+
To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
|
48
|
+
|
49
|
+
### Scoring Rules
|
50
|
+
|
51
|
+
1. For each case, select a base value and score each system based on relative values.
|
52
|
+
- For QPS and QP$, we use the highest value as the reference, denoted as `base_QPS` or `base_QP$`, and the score of each system is `(QPS/base_QPS) * 100` or `(QP$/base_QP$) * 100`.
|
53
|
+
- For Latency, we use the lowest value as the reference, that is, `base_Latency`, and the score of each system is `(Latency + 10ms)/(base_Latency + 10ms)`.
|
54
|
+
|
55
|
+
We want to give equal weight to different cases, and not let a case with high absolute result values become the sole reason for the overall scoring. Therefore, when scoring different systems in each case, we need to use relative values.
|
56
|
+
|
57
|
+
Also, for Latency, we add 10ms to the numerator and denominator to ensure that if every system performs particularly well in a case, its advantage will not be infinitely magnified when latency tends to 0.
|
58
|
+
|
59
|
+
2. For systems that fail or timeout in a particular case, we will give them a score based on a value worse than the worst result by a factor of two. For example, in QPS or QP$, it would be half the lowest value. For Latency, it would be twice the maximum value.
|
60
|
+
|
61
|
+
3. For each system, we will take the geometric mean of its scores in all cases as its comprehensive score for a particular metric.
|
62
|
+
|
44
63
|
## Build on your own
|
45
64
|
### Install requirements
|
46
65
|
``` shell
|
@@ -69,10 +88,12 @@ $ ruff check vectordb_bench --fix
|
|
69
88
|
|
70
89
|
## How does it work?
|
71
90
|
### Result Page
|
72
|
-

|
73
92
|
This is the main page of VectorDBBench, which displays the standard benchmark results we provide. Additionally, results of all tests performed by users themselves will also be shown here. We also offer the ability to select and compare results from multiple tests simultaneously.
|
74
93
|
|
75
94
|
The standard benchmark results displayed here include all 9 cases that we currently support for all our clients (Milvus, Zilliz Cloud, Elastic Search, Qdrant Cloud, and Weaviate Cloud). However, as some systems may not be able to complete all the tests successfully due to issues like Out of Memory (OOM) or timeouts, not all clients are included in every case.
|
95
|
+
|
96
|
+
All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
|
76
97
|
### Run Test Page
|
77
98
|

|
78
99
|
This is the page to run a test:
|
@@ -102,17 +123,17 @@ We've developed an array of 9 comprehensive benchmark cases to test vector datab
|
|
102
123
|
- **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (Cohere 1M vectors, 768 dimensions) with a high filtering rate.
|
103
124
|
For a quick reference, here is a table summarizing the key aspects of each case:
|
104
125
|
|
105
|
-
Case No. | Case Type | Dataset Size
|
106
|
-
|
107
|
-
1 | Capacity Case |
|
108
|
-
2 | Capacity Case |
|
109
|
-
3 | Search Performance Case |
|
110
|
-
4 | Search Performance Case |
|
111
|
-
5 | Search Performance Case |
|
112
|
-
6 | Filtering Search Performance Case |
|
113
|
-
7 | Filtering Search Performance Case |
|
114
|
-
8 | Filtering Search Performance Case |
|
115
|
-
9 | Filtering Search Performance Case |
|
126
|
+
Case No. | Case Type | Dataset Size | Filtering Rate | Results |
|
127
|
+
|----------|-----------|--------------|----------------|---------|
|
128
|
+
1 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
|
129
|
+
2 | Capacity Case | SIFT 100K vectors, 128 dimensions | N/A | Number of inserted vectors |
|
130
|
+
3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
131
|
+
4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
132
|
+
5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
133
|
+
6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
134
|
+
7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
135
|
+
8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
136
|
+
9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
116
137
|
|
117
138
|
Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
|
118
139
|
|
@@ -200,3 +221,64 @@ For the Client, we welcome any parameter tuning to obtain better results.
|
|
200
221
|
Many databases may not be able to complete all test cases due to issues such as Out of Memory (OOM), crashes, or timeouts. In these scenarios, we will clearly state these occurrences in the test results.
|
201
222
|
### Mistake Or Misrepresentation
|
202
223
|
We strive for accuracy in learning and supporting various vector databases, yet there might be oversights or misapplications. For any such occurrences, feel free to [raise an issue](https://github.com/zilliztech/VectorDBBench/issues/new) or make amendments on our GitHub page.
|
224
|
+
## Timeout
|
225
|
+
In our pursuit to ensure that our benchmark reflects the reality of a production environment while guaranteeing the practicality of the system, we have implemented a timeout plan based on our experiences for various tests.
|
226
|
+
|
227
|
+
**1. Capacity Case:**
|
228
|
+
- For Capacity Case, we have assigned an overall timeout.
|
229
|
+
|
230
|
+
**2. Other Cases:**
|
231
|
+
|
232
|
+
For other cases, we have set two timeouts:
|
233
|
+
|
234
|
+
- **Data Loading Timeout:** This timeout is designed to filter out systems that are too slow in inserting data, thus ensuring that we are only considering systems that is able to cope with the demands of a real-world production environment within a reasonable time frame.
|
235
|
+
|
236
|
+
- **Optimization Preparation Timeout**: This timeout is established to avoid excessive optimization strategies that might work for benchmarks but fail to deliver in real production environments. By doing this, we ensure that the systems we consider are not only suitable for testing environments but also applicable and efficient in production scenarios.
|
237
|
+
|
238
|
+
This multi-tiered timeout approach allows our benchmark to be more representative of actual production environments and assists us in identifying systems that can truly perform in real-world scenarios.
|
239
|
+
<table>
|
240
|
+
<tr>
|
241
|
+
<th>Case</th>
|
242
|
+
<th>Data Size</th>
|
243
|
+
<th>Timeout Type</th>
|
244
|
+
<th>Value</th>
|
245
|
+
</tr>
|
246
|
+
<tr>
|
247
|
+
<td>Capacity Case</td>
|
248
|
+
<td>N/A</td>
|
249
|
+
<td>Loading timeout</td>
|
250
|
+
<td>24 hours</td>
|
251
|
+
</tr>
|
252
|
+
<tr>
|
253
|
+
<td rowspan="2">Other Cases</td>
|
254
|
+
<td rowspan="2">1M vectors, 768 dimensions</td>
|
255
|
+
<td>Loading timeout</td>
|
256
|
+
<td>2.5 hours</td>
|
257
|
+
</tr>
|
258
|
+
<tr>
|
259
|
+
<td>Optimization timeout</td>
|
260
|
+
<td>15 mins</td>
|
261
|
+
</tr>
|
262
|
+
<tr>
|
263
|
+
<td rowspan="2">Other Cases</td>
|
264
|
+
<td rowspan="2">10M vectors, 768 dimensions</td>
|
265
|
+
<td>Loading timeout</td>
|
266
|
+
<td>25 hours</td>
|
267
|
+
</tr>
|
268
|
+
<tr>
|
269
|
+
<td>Optimization timeout</td>
|
270
|
+
<td>2.5 hours</td>
|
271
|
+
</tr>
|
272
|
+
<tr>
|
273
|
+
<td rowspan="2">Other Cases</td>
|
274
|
+
<td rowspan="2">100M vectors, 768 dimensions</td>
|
275
|
+
<td>Loading timeout</td>
|
276
|
+
<td>250 hours</td>
|
277
|
+
</tr>
|
278
|
+
<tr>
|
279
|
+
<td>Optimization timeout</td>
|
280
|
+
<td>25 hours</td>
|
281
|
+
</tr>
|
282
|
+
</table>
|
283
|
+
|
284
|
+
**Note:** Some datapoints in the standard benchmark results that voilate this timeout will be kept for now for reference. We will remove them in the future.
|
@@ -3,6 +3,7 @@
|
|
3
3
|
[](https://pypi.org/project/vectordb-bench/)
|
4
4
|
[](https://pepy.tech/project/vectordb-bench)
|
5
5
|
|
6
|
+
**Leaderboard:** https://zilliz.com/benchmark
|
6
7
|
## Quick Start
|
7
8
|
### Prerequirement
|
8
9
|
``` shell
|
@@ -27,6 +28,24 @@ Closely mimicking real-world production environments, we've set up diverse testi
|
|
27
28
|
|
28
29
|
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
29
30
|
|
31
|
+
## Leaderboard
|
32
|
+
### Introduction
|
33
|
+
To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
|
34
|
+
|
35
|
+
### Scoring Rules
|
36
|
+
|
37
|
+
1. For each case, select a base value and score each system based on relative values.
|
38
|
+
- For QPS and QP$, we use the highest value as the reference, denoted as `base_QPS` or `base_QP$`, and the score of each system is `(QPS/base_QPS) * 100` or `(QP$/base_QP$) * 100`.
|
39
|
+
- For Latency, we use the lowest value as the reference, that is, `base_Latency`, and the score of each system is `(Latency + 10ms)/(base_Latency + 10ms)`.
|
40
|
+
|
41
|
+
We want to give equal weight to different cases, and not let a case with high absolute result values become the sole reason for the overall scoring. Therefore, when scoring different systems in each case, we need to use relative values.
|
42
|
+
|
43
|
+
Also, for Latency, we add 10ms to the numerator and denominator to ensure that if every system performs particularly well in a case, its advantage will not be infinitely magnified when latency tends to 0.
|
44
|
+
|
45
|
+
2. For systems that fail or timeout in a particular case, we will give them a score based on a value worse than the worst result by a factor of two. For example, in QPS or QP$, it would be half the lowest value. For Latency, it would be twice the maximum value.
|
46
|
+
|
47
|
+
3. For each system, we will take the geometric mean of its scores in all cases as its comprehensive score for a particular metric.
|
48
|
+
|
30
49
|
## Build on your own
|
31
50
|
### Install requirements
|
32
51
|
``` shell
|
@@ -55,10 +74,12 @@ $ ruff check vectordb_bench --fix
|
|
55
74
|
|
56
75
|
## How does it work?
|
57
76
|
### Result Page
|
58
|
-

|
59
78
|
This is the main page of VectorDBBench, which displays the standard benchmark results we provide. Additionally, results of all tests performed by users themselves will also be shown here. We also offer the ability to select and compare results from multiple tests simultaneously.
|
60
79
|
|
61
80
|
The standard benchmark results displayed here include all 9 cases that we currently support for all our clients (Milvus, Zilliz Cloud, Elastic Search, Qdrant Cloud, and Weaviate Cloud). However, as some systems may not be able to complete all the tests successfully due to issues like Out of Memory (OOM) or timeouts, not all clients are included in every case.
|
81
|
+
|
82
|
+
All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
|
62
83
|
### Run Test Page
|
63
84
|

|
64
85
|
This is the page to run a test:
|
@@ -88,17 +109,17 @@ We've developed an array of 9 comprehensive benchmark cases to test vector datab
|
|
88
109
|
- **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (Cohere 1M vectors, 768 dimensions) with a high filtering rate.
|
89
110
|
For a quick reference, here is a table summarizing the key aspects of each case:
|
90
111
|
|
91
|
-
Case No. | Case Type | Dataset Size
|
92
|
-
|
93
|
-
1 | Capacity Case |
|
94
|
-
2 | Capacity Case |
|
95
|
-
3 | Search Performance Case |
|
96
|
-
4 | Search Performance Case |
|
97
|
-
5 | Search Performance Case |
|
98
|
-
6 | Filtering Search Performance Case |
|
99
|
-
7 | Filtering Search Performance Case |
|
100
|
-
8 | Filtering Search Performance Case |
|
101
|
-
9 | Filtering Search Performance Case |
|
112
|
+
Case No. | Case Type | Dataset Size | Filtering Rate | Results |
|
113
|
+
|----------|-----------|--------------|----------------|---------|
|
114
|
+
1 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
|
115
|
+
2 | Capacity Case | SIFT 100K vectors, 128 dimensions | N/A | Number of inserted vectors |
|
116
|
+
3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
117
|
+
4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
118
|
+
5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
119
|
+
6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
120
|
+
7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
121
|
+
8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
122
|
+
9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
102
123
|
|
103
124
|
Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
|
104
125
|
|
@@ -186,3 +207,64 @@ For the Client, we welcome any parameter tuning to obtain better results.
|
|
186
207
|
Many databases may not be able to complete all test cases due to issues such as Out of Memory (OOM), crashes, or timeouts. In these scenarios, we will clearly state these occurrences in the test results.
|
187
208
|
### Mistake Or Misrepresentation
|
188
209
|
We strive for accuracy in learning and supporting various vector databases, yet there might be oversights or misapplications. For any such occurrences, feel free to [raise an issue](https://github.com/zilliztech/VectorDBBench/issues/new) or make amendments on our GitHub page.
|
210
|
+
## Timeout
|
211
|
+
In our pursuit to ensure that our benchmark reflects the reality of a production environment while guaranteeing the practicality of the system, we have implemented a timeout plan based on our experiences for various tests.
|
212
|
+
|
213
|
+
**1. Capacity Case:**
|
214
|
+
- For Capacity Case, we have assigned an overall timeout.
|
215
|
+
|
216
|
+
**2. Other Cases:**
|
217
|
+
|
218
|
+
For other cases, we have set two timeouts:
|
219
|
+
|
220
|
+
- **Data Loading Timeout:** This timeout is designed to filter out systems that are too slow in inserting data, thus ensuring that we are only considering systems that is able to cope with the demands of a real-world production environment within a reasonable time frame.
|
221
|
+
|
222
|
+
- **Optimization Preparation Timeout**: This timeout is established to avoid excessive optimization strategies that might work for benchmarks but fail to deliver in real production environments. By doing this, we ensure that the systems we consider are not only suitable for testing environments but also applicable and efficient in production scenarios.
|
223
|
+
|
224
|
+
This multi-tiered timeout approach allows our benchmark to be more representative of actual production environments and assists us in identifying systems that can truly perform in real-world scenarios.
|
225
|
+
<table>
|
226
|
+
<tr>
|
227
|
+
<th>Case</th>
|
228
|
+
<th>Data Size</th>
|
229
|
+
<th>Timeout Type</th>
|
230
|
+
<th>Value</th>
|
231
|
+
</tr>
|
232
|
+
<tr>
|
233
|
+
<td>Capacity Case</td>
|
234
|
+
<td>N/A</td>
|
235
|
+
<td>Loading timeout</td>
|
236
|
+
<td>24 hours</td>
|
237
|
+
</tr>
|
238
|
+
<tr>
|
239
|
+
<td rowspan="2">Other Cases</td>
|
240
|
+
<td rowspan="2">1M vectors, 768 dimensions</td>
|
241
|
+
<td>Loading timeout</td>
|
242
|
+
<td>2.5 hours</td>
|
243
|
+
</tr>
|
244
|
+
<tr>
|
245
|
+
<td>Optimization timeout</td>
|
246
|
+
<td>15 mins</td>
|
247
|
+
</tr>
|
248
|
+
<tr>
|
249
|
+
<td rowspan="2">Other Cases</td>
|
250
|
+
<td rowspan="2">10M vectors, 768 dimensions</td>
|
251
|
+
<td>Loading timeout</td>
|
252
|
+
<td>25 hours</td>
|
253
|
+
</tr>
|
254
|
+
<tr>
|
255
|
+
<td>Optimization timeout</td>
|
256
|
+
<td>2.5 hours</td>
|
257
|
+
</tr>
|
258
|
+
<tr>
|
259
|
+
<td rowspan="2">Other Cases</td>
|
260
|
+
<td rowspan="2">100M vectors, 768 dimensions</td>
|
261
|
+
<td>Loading timeout</td>
|
262
|
+
<td>250 hours</td>
|
263
|
+
</tr>
|
264
|
+
<tr>
|
265
|
+
<td>Optimization timeout</td>
|
266
|
+
<td>25 hours</td>
|
267
|
+
</tr>
|
268
|
+
</table>
|
269
|
+
|
270
|
+
**Note:** Some datapoints in the standard benchmark results that voilate this timeout will be kept for now for reference. We will remove them in the future.
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from vectordb_bench.backend.dataset import Dataset
|
2
|
+
import logging
|
3
|
+
import pytest
|
4
|
+
from pydantic import ValidationError
|
5
|
+
|
6
|
+
|
7
|
+
log = logging.getLogger("vectordb_bench")
|
8
|
+
|
9
|
+
class TestDataSet:
|
10
|
+
def test_iter_dataset(self):
|
11
|
+
for ds in Dataset:
|
12
|
+
log.info(ds)
|
13
|
+
|
14
|
+
def test_cohere(self):
|
15
|
+
cohere = Dataset.COHERE.get(100_000)
|
16
|
+
log.info(cohere)
|
17
|
+
assert cohere.name == "Cohere"
|
18
|
+
assert cohere.size == 100_000
|
19
|
+
assert cohere.label == "SMALL"
|
20
|
+
assert cohere.dim == 768
|
21
|
+
|
22
|
+
def test_cohere_error(self):
|
23
|
+
with pytest.raises(ValidationError):
|
24
|
+
Dataset.COHERE.get(9999)
|
25
|
+
|
26
|
+
def test_init_cohere(self):
|
27
|
+
coheres = [Dataset.COHERE.manager(i) for i in [100_000, 1_000_000, 10_000_000]]
|
28
|
+
for t in coheres:
|
29
|
+
t._validate_local_file()
|
30
|
+
|
31
|
+
def test_iter_cohere(self):
|
32
|
+
cohere_10m = Dataset.COHERE.manager(10_000_000)
|
33
|
+
cohere_10m.prepare(False)
|
34
|
+
for i in cohere_10m:
|
35
|
+
log.debug(i.head(1))
|
36
|
+
|
@@ -3,32 +3,21 @@ from vectordb_bench.backend.cases import (
|
|
3
3
|
CaseType,
|
4
4
|
)
|
5
5
|
|
6
|
-
|
7
|
-
from pydantic.dataclasses import dataclass
|
8
|
-
|
9
|
-
@dataclass
|
10
|
-
class Cohere_S(ds.Cohere):
|
11
|
-
label: str = "SMALL"
|
12
|
-
size: int = 100_000
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class Glove_S(ds.Glove):
|
16
|
-
label: str = "SMALL"
|
17
|
-
size : int = 100_000
|
6
|
+
from vectordb_bench.backend.datase import Dataset, DatasetManager
|
18
7
|
|
19
8
|
|
20
9
|
class Performance100K99p(PerformanceCase):
|
21
|
-
case_id: CaseType =
|
10
|
+
case_id: CaseType = 100
|
22
11
|
filter_rate: float | int | None = 0.99
|
23
|
-
dataset:
|
12
|
+
dataset: DatasetManager = Dataset.COHERE.manager(100_000)
|
24
13
|
name: str = "Filtering Search Performance Test (100K Dataset, 768 Dim, Filter 99%)"
|
25
14
|
description: str = """This case tests the search performance of a vector database with a small dataset (<b>Cohere 100K vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
|
26
15
|
Results will show index building time, recall, and maximum QPS."""
|
27
16
|
|
28
17
|
class Performance100K1p(PerformanceCase):
|
29
|
-
case_id: CaseType =
|
18
|
+
case_id: CaseType = 100
|
30
19
|
filter_rate: float | int | None = 0.01
|
31
|
-
dataset:
|
20
|
+
dataset: DatasetManager = Dataset.COHERE.manager(100_000)
|
32
21
|
name: str = "Filtering Search Performance Test (100K Dataset, 768 Dim, Filter 1%)"
|
33
22
|
description: str = (
|
34
23
|
"""This case tests the search performance of a vector database with a small dataset (<b>Cohere 100K vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
|
@@ -37,10 +26,8 @@ Results will show index building time, recall, and maximum QPS.""",
|
|
37
26
|
|
38
27
|
|
39
28
|
class Performance100K(PerformanceCase):
|
40
|
-
case_id: CaseType =
|
41
|
-
dataset:
|
29
|
+
case_id: CaseType = 100
|
30
|
+
dataset: DatasetManager = Dataset.COHERE.manager(100_000)
|
42
31
|
name: str = "Search Performance Test (100K Dataset, 768 Dim)"
|
43
32
|
description: str = """This case tests the search performance of a vector database with a small dataset (<b>Cohere 100K vectors</b>, 768 dimensions) at varying parallel levels.
|
44
33
|
Results will show index building time, recall, and maximum QPS."""
|
45
|
-
|
46
|
-
|
@@ -18,12 +18,23 @@ class config:
|
|
18
18
|
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
|
19
19
|
|
20
20
|
RESULTS_LOCAL_DIR = pathlib.Path(__file__).parent.joinpath("results")
|
21
|
-
|
21
|
+
|
22
|
+
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
|
23
|
+
LOAD_TIMEOUT_1M = 2.5 * 3600 # 2.5h
|
24
|
+
LOAD_TIMEOUT_10M = 25 * 3600 # 25h
|
25
|
+
LOAD_TIMEOUT_100M = 250 * 3600 # 10.41d
|
26
|
+
|
27
|
+
OPTIMIZE_TIMEOUT_1M = 15 * 60 # 15min
|
28
|
+
OPTIMIZE_TIMEOUT_10M = 2.5 * 3600 # 2.5h
|
29
|
+
OPTIMIZE_TIMEOUT_100M = 25 * 3600 # 1.04d
|
22
30
|
|
23
31
|
|
24
32
|
def display(self) -> str:
|
25
|
-
tmp = [
|
26
|
-
|
33
|
+
tmp = [
|
34
|
+
i for i in inspect.getmembers(self)
|
35
|
+
if not inspect.ismethod(i[1])
|
36
|
+
and not i[0].startswith('_')
|
37
|
+
and "TIMEOUT" not in i[0]
|
27
38
|
]
|
28
39
|
return tmp
|
29
40
|
|
@@ -2,8 +2,10 @@ import typing
|
|
2
2
|
import logging
|
3
3
|
from enum import Enum, auto
|
4
4
|
|
5
|
-
from
|
6
|
-
from
|
5
|
+
from vectordb_bench import config
|
6
|
+
from vectordb_bench.base import BaseModel
|
7
|
+
|
8
|
+
from .dataset import Dataset, DatasetManager
|
7
9
|
|
8
10
|
|
9
11
|
log = logging.getLogger(__name__)
|
@@ -44,7 +46,7 @@ class CaseType(Enum):
|
|
44
46
|
if c is not None:
|
45
47
|
return c().name
|
46
48
|
raise ValueError("Case unsupported")
|
47
|
-
|
49
|
+
|
48
50
|
@property
|
49
51
|
def case_description(self) -> str:
|
50
52
|
c = self.case_cls
|
@@ -73,7 +75,10 @@ class Case(BaseModel):
|
|
73
75
|
label: CaseLabel
|
74
76
|
name: str
|
75
77
|
description: str
|
76
|
-
dataset:
|
78
|
+
dataset: DatasetManager
|
79
|
+
|
80
|
+
load_timeout: float | int
|
81
|
+
optimize_timeout: float | int | None
|
77
82
|
|
78
83
|
filter_rate: float | None
|
79
84
|
|
@@ -92,6 +97,8 @@ class Case(BaseModel):
|
|
92
97
|
class CapacityCase(Case, BaseModel):
|
93
98
|
label: CaseLabel = CaseLabel.Load
|
94
99
|
filter_rate: float | None = None
|
100
|
+
load_timeout: float | int = config.CAPACITY_TIMEOUT_IN_SECONDS
|
101
|
+
optimize_timeout: float | int | None = None
|
95
102
|
|
96
103
|
|
97
104
|
class PerformanceCase(Case, BaseModel):
|
@@ -101,7 +108,7 @@ class PerformanceCase(Case, BaseModel):
|
|
101
108
|
|
102
109
|
class CapacityDim960(CapacityCase):
|
103
110
|
case_id: CaseType = CaseType.CapacityDim960
|
104
|
-
dataset:
|
111
|
+
dataset: DatasetManager = Dataset.GIST.manager(100_000)
|
105
112
|
name: str = "Capacity Test (960 Dim Repeated)"
|
106
113
|
description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded.
|
107
114
|
Number of inserted vectors will be reported."""
|
@@ -109,7 +116,7 @@ Number of inserted vectors will be reported."""
|
|
109
116
|
|
110
117
|
class CapacityDim128(CapacityCase):
|
111
118
|
case_id: CaseType = CaseType.CapacityDim128
|
112
|
-
dataset:
|
119
|
+
dataset: DatasetManager = Dataset.SIFT.manager(500_000)
|
113
120
|
name: str = "Capacity Test (128 Dim Repeated)"
|
114
121
|
description: str = """This case tests the vector database's loading capacity by repeatedly inserting small-dimension vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded.
|
115
122
|
Number of inserted vectors will be reported."""
|
@@ -117,64 +124,78 @@ Number of inserted vectors will be reported."""
|
|
117
124
|
|
118
125
|
class Performance10M(PerformanceCase):
|
119
126
|
case_id: CaseType = CaseType.Performance10M
|
120
|
-
dataset:
|
127
|
+
dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
|
121
128
|
name: str = "Search Performance Test (10M Dataset, 768 Dim)"
|
122
129
|
description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
|
123
130
|
Results will show index building time, recall, and maximum QPS."""
|
131
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_10M
|
132
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_10M
|
124
133
|
|
125
134
|
|
126
135
|
class Performance1M(PerformanceCase):
|
127
136
|
case_id: CaseType = CaseType.Performance1M
|
128
|
-
dataset:
|
137
|
+
dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
|
129
138
|
name: str = "Search Performance Test (1M Dataset, 768 Dim)"
|
130
139
|
description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
|
131
140
|
Results will show index building time, recall, and maximum QPS."""
|
141
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_1M
|
142
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1M
|
132
143
|
|
133
144
|
|
134
145
|
class Performance10M1P(PerformanceCase):
|
135
146
|
case_id: CaseType = CaseType.Performance10M1P
|
136
147
|
filter_rate: float | int | None = 0.01
|
137
|
-
dataset:
|
148
|
+
dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
|
138
149
|
name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 1%)"
|
139
150
|
description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
|
140
151
|
Results will show index building time, recall, and maximum QPS."""
|
152
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_10M
|
153
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_10M
|
141
154
|
|
142
155
|
|
143
156
|
class Performance1M1P(PerformanceCase):
|
144
157
|
case_id: CaseType = CaseType.Performance1M1P
|
145
158
|
filter_rate: float | int | None = 0.01
|
146
|
-
dataset:
|
159
|
+
dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
|
147
160
|
name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 1%)"
|
148
161
|
description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
|
149
162
|
Results will show index building time, recall, and maximum QPS."""
|
163
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_1M
|
164
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1M
|
150
165
|
|
151
166
|
|
152
167
|
class Performance10M99P(PerformanceCase):
|
153
168
|
case_id: CaseType = CaseType.Performance10M99P
|
154
169
|
filter_rate: float | int | None = 0.99
|
155
|
-
dataset:
|
170
|
+
dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
|
156
171
|
name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 99%)"
|
157
172
|
description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
|
158
173
|
Results will show index building time, recall, and maximum QPS."""
|
174
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_10M
|
175
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_10M
|
159
176
|
|
160
177
|
|
161
178
|
class Performance1M99P(PerformanceCase):
|
162
179
|
case_id: CaseType = CaseType.Performance1M99P
|
163
180
|
filter_rate: float | int | None = 0.99
|
164
|
-
dataset:
|
181
|
+
dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
|
165
182
|
name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 99%)"
|
166
183
|
description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
|
167
184
|
Results will show index building time, recall, and maximum QPS."""
|
185
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_1M
|
186
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1M
|
168
187
|
|
169
188
|
|
170
189
|
|
171
190
|
class Performance100M(PerformanceCase):
|
172
191
|
case_id: CaseType = CaseType.Performance100M
|
173
192
|
filter_rate: float | int | None = None
|
174
|
-
dataset:
|
193
|
+
dataset: DatasetManager = Dataset.LAION.manager(100_000_000)
|
175
194
|
name: str = "Search Performance Test (100M Dataset, 768 Dim)"
|
176
195
|
description: str = """This case tests the search performance of a vector database with a large 100M dataset (<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels.
|
177
196
|
Results will show index building time, recall, and maximum QPS."""
|
197
|
+
load_timeout: float | int = config.LOAD_TIMEOUT_100M
|
198
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_100M
|
178
199
|
|
179
200
|
|
180
201
|
type2case = {
|
@@ -15,7 +15,7 @@ from .pinecone.pinecone import Pinecone
|
|
15
15
|
from .weaviate_cloud.weaviate_cloud import WeaviateCloud
|
16
16
|
from .qdrant_cloud.qdrant_cloud import QdrantCloud
|
17
17
|
from .zilliz_cloud.zilliz_cloud import ZillizCloud
|
18
|
-
|
18
|
+
from .pgvector.pgvector import PgVector
|
19
19
|
|
20
20
|
class DB(Enum):
|
21
21
|
"""Database types
|
@@ -35,6 +35,7 @@ class DB(Enum):
|
|
35
35
|
ElasticCloud = "ElasticCloud"
|
36
36
|
QdrantCloud = "QdrantCloud"
|
37
37
|
WeaviateCloud = "WeaviateCloud"
|
38
|
+
PgVector = "PgVector"
|
38
39
|
|
39
40
|
|
40
41
|
@property
|
@@ -49,8 +50,12 @@ db2client = {
|
|
49
50
|
DB.ElasticCloud: ElasticCloud,
|
50
51
|
DB.QdrantCloud: QdrantCloud,
|
51
52
|
DB.Pinecone: Pinecone,
|
53
|
+
DB.PgVector: PgVector
|
52
54
|
}
|
53
55
|
|
56
|
+
for db in DB:
|
57
|
+
assert issubclass(db.init_cls, VectorDB)
|
58
|
+
|
54
59
|
|
55
60
|
__all__ = [
|
56
61
|
"DB", "VectorDB", "DBConfig", "DBCaseConfig", "IndexType", "MetricType", "EmptyDBCaseConfig",
|