vectordb-bench 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. vectordb-bench-0.0.3/.env.example +11 -0
  2. vectordb-bench-0.0.3/.github/workflows/publish_package_on_release.yml +46 -0
  3. vectordb-bench-0.0.3/.gitignore +11 -0
  4. vectordb-bench-0.0.3/.ruff.toml +49 -0
  5. {vectordb-bench-0.0.1/vectordb_bench.egg-info → vectordb-bench-0.0.3}/PKG-INFO +104 -27
  6. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/README.md +103 -26
  7. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/pyproject.toml +7 -2
  8. vectordb-bench-0.0.3/tests/conftest.py +4 -0
  9. vectordb-bench-0.0.3/tests/pytest.ini +4 -0
  10. vectordb-bench-0.0.3/tests/test_dataset.py +36 -0
  11. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/tests/test_models.py +2 -2
  12. vectordb-bench-0.0.3/tests/ut_cases.py +33 -0
  13. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/__init__.py +14 -3
  14. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/assembler.py +2 -2
  15. vectordb-bench-0.0.3/vectordb_bench/backend/cases.py +213 -0
  16. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/__init__.py +6 -1
  17. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/api.py +23 -11
  18. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  19. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +11 -9
  20. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/milvus/config.py +2 -3
  21. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/milvus/milvus.py +32 -19
  22. vectordb-bench-0.0.3/vectordb_bench/backend/clients/pgvector/config.py +49 -0
  23. vectordb-bench-0.0.3/vectordb_bench/backend/clients/pgvector/pgvector.py +171 -0
  24. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/pinecone/config.py +3 -3
  25. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/pinecone/pinecone.py +19 -13
  26. vectordb-bench-0.0.3/vectordb_bench/backend/clients/qdrant_cloud/config.py +33 -0
  27. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +12 -13
  28. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/weaviate_cloud/config.py +3 -3
  29. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +9 -8
  30. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/zilliz_cloud/config.py +5 -4
  31. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +3 -1
  32. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/dataset.py +100 -162
  33. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/result_collector.py +2 -2
  34. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/runner/mp_runner.py +29 -13
  35. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/runner/serial_runner.py +98 -36
  36. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/task_runner.py +43 -48
  37. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/charts.py +10 -21
  38. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/data.py +31 -15
  39. vectordb-bench-0.0.3/vectordb_bench/frontend/components/check_results/expanderStyle.py +37 -0
  40. vectordb-bench-0.0.3/vectordb_bench/frontend/components/check_results/filters.py +125 -0
  41. vectordb-bench-0.0.3/vectordb_bench/frontend/components/check_results/footer.py +8 -0
  42. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/headerIcon.py +8 -4
  43. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/nav.py +7 -6
  44. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/check_results/priceTable.py +3 -2
  45. vectordb-bench-0.0.3/vectordb_bench/frontend/components/check_results/stPageConfig.py +18 -0
  46. vectordb-bench-0.0.3/vectordb_bench/frontend/components/get_results/saveAsImage.py +50 -0
  47. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
  48. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/caseSelector.py +19 -16
  49. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +20 -7
  50. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/dbSelector.py +5 -5
  51. vectordb-bench-0.0.3/vectordb_bench/frontend/components/run_test/hideSidebar.py +8 -0
  52. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/submitTask.py +16 -10
  53. vectordb-bench-0.0.3/vectordb_bench/frontend/const/dbCaseConfigs.py +291 -0
  54. vectordb-bench-0.0.3/vectordb_bench/frontend/const/dbPrices.py +6 -0
  55. vectordb-bench-0.0.3/vectordb_bench/frontend/const/styles.py +58 -0
  56. vectordb-bench-0.0.1/vectordb_bench/frontend/pages/qps_with_price.py → vectordb-bench-0.0.3/vectordb_bench/frontend/pages/quries_per_dollar.py +24 -17
  57. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/pages/run_test.py +17 -11
  58. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/vdb_benchmark.py +19 -12
  59. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/metric.py +19 -10
  60. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/models.py +14 -40
  61. vectordb-bench-0.0.3/vectordb_bench/results/dbPrices.json +32 -0
  62. vectordb-bench-0.0.3/vectordb_bench/results/getLeaderboardData.py +52 -0
  63. vectordb-bench-0.0.3/vectordb_bench/results/leaderboard.json +1 -0
  64. vectordb-bench-0.0.1/vectordb_bench/results/result_20230609_standard.json → vectordb-bench-0.0.3/vectordb_bench/results/result_20230705_standard.json +1910 -897
  65. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3/vectordb_bench.egg-info}/PKG-INFO +104 -27
  66. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/SOURCES.txt +21 -3
  67. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/requires.txt +3 -0
  68. vectordb-bench-0.0.1/tests/test_dataset.py +0 -53
  69. vectordb-bench-0.0.1/vectordb_bench/backend/cases.py +0 -124
  70. vectordb-bench-0.0.1/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -16
  71. vectordb-bench-0.0.1/vectordb_bench/frontend/components/check_results/filters.py +0 -97
  72. vectordb-bench-0.0.1/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -10
  73. vectordb-bench-0.0.1/vectordb_bench/frontend/const.py +0 -391
  74. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/LICENSE +0 -0
  75. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/setup.cfg +0 -0
  76. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/tests/test_bench_runner.py +0 -0
  77. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/tests/test_elasticsearch_cloud.py +0 -0
  78. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/tests/test_utils.py +0 -0
  79. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/__main__.py +0 -0
  80. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/__init__.py +0 -0
  81. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/runner/__init__.py +0 -0
  82. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/backend/utils.py +0 -0
  83. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/base.py +0 -0
  84. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  85. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/frontend/utils.py +0 -0
  86. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/interface.py +0 -0
  87. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench/log_util.py +0 -0
  88. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  89. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/entry_points.txt +0 -0
  90. {vectordb-bench-0.0.1 → vectordb-bench-0.0.3}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -0,0 +1,11 @@
1
+ # LOG_LEVEL=
2
+ # LOG_PATH=
3
+ # LOG_NAME=
4
+ # TIMEZONE=
5
+
6
+ # NUM_PER_BATCH=
7
+ # DEFAULT_DATASET_URL=
8
+
9
+ DATASET_LOCAL_DIR="/tmp/vector_db_bench/dataset"
10
+
11
+ # DROP_OLD = True
@@ -0,0 +1,46 @@
1
+ name: Publish Python 🐍 distributions 📦 to TestPyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ build-n-publish:
9
+ name: Build and publish Python 🐍 distributions 📦 to PyPI
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Check out from Git
14
+ uses: actions/checkout@v3
15
+ - name: Get history and tags for SCM versioning
16
+ run: |
17
+ git fetch --prune --unshallow
18
+ git fetch --depth=1 origin +refs/tags/*:refs/tags/*
19
+ - name: Set up Python 3.11
20
+ uses: actions/setup-python@v4
21
+ with:
22
+ python-version: 3.11
23
+ - name: Install pypa/build
24
+ run: >-
25
+ python -m
26
+ pip install
27
+ build
28
+ --user
29
+ - name: Build a binary wheel and a source tarball
30
+ run: >-
31
+ python -m
32
+ build
33
+ --sdist
34
+ --wheel
35
+ --outdir dist/
36
+ .
37
+ - name: Publish distribution 📦 to Test PyPI
38
+ uses: pypa/gh-action-pypi-publish@release/v1
39
+ with:
40
+ password: ${{ secrets.TEST_PYPI_API_TOKEN }}
41
+ repository-url: https://test.pypi.org/legacy/
42
+ - name: Publish distribution 📦 to PyPI
43
+ if: startsWith(github.ref, 'refs/tags')
44
+ uses: pypa/gh-action-pypi-publish@release/v1
45
+ with:
46
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,11 @@
1
+ *.sw[op]
2
+ *.egg-info
3
+ dist/
4
+ __pycache__
5
+ .env
6
+ .data/
7
+ __MACOSX
8
+ .DS_Store
9
+ build/
10
+ venv/
11
+ .idea/
@@ -0,0 +1,49 @@
1
+ # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
2
+ # Enable flake8-bugbear (`B`) rules.
3
+ select = ["E", "F", "B"]
4
+ ignore = [
5
+ "E501", # (line length violations)
6
+ ]
7
+
8
+ # Allow autofix for all enabled rules (when `--fix`) is provided.
9
+ fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM",
10
+ "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT",
11
+ "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT",
12
+ ]
13
+ unfixable = []
14
+
15
+ # Exclude a variety of commonly ignored directories.
16
+ exclude = [
17
+ ".bzr",
18
+ ".direnv",
19
+ ".eggs",
20
+ ".git",
21
+ ".hg",
22
+ ".mypy_cache",
23
+ ".nox",
24
+ ".pants.d",
25
+ ".pytype",
26
+ ".ruff_cache",
27
+ ".svn",
28
+ ".tox",
29
+ ".venv",
30
+ "__pypackages__",
31
+ "_build",
32
+ "buck-out",
33
+ "build",
34
+ "dist",
35
+ "node_modules",
36
+ "venv",
37
+ "__pycache__",
38
+ "__init__.py",
39
+ ]
40
+
41
+ # Allow unused variables when underscore-prefixed.
42
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
43
+
44
+ # Assume Python 3.11.
45
+ target-version = "py311"
46
+
47
+ [mccabe]
48
+ # Unlike Flake8, default to a complexity level of 10.
49
+ max-complexity = 10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -14,6 +14,10 @@ License-File: LICENSE
14
14
 
15
15
  # VectorDBBench: A Benchmark Tool for VectorDB
16
16
 
17
+ [![version](https://img.shields.io/pypi/v/vectordb-bench.svg?color=blue)](https://pypi.org/project/vectordb-bench/)
18
+ [![Downloads](https://pepy.tech/badge/vectordb-bench)](https://pepy.tech/project/vectordb-bench)
19
+
20
+ **Leaderboard:** https://zilliz.com/benchmark
17
21
  ## Quick Start
18
22
  ### Prerequirement
19
23
  ``` shell
@@ -34,10 +38,28 @@ VectorDBBench is not just an offering of benchmark results for mainstream vector
34
38
  Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
35
39
  To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
36
40
 
37
- Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as SIFT, GIST, Cohere, and more. It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
41
+ Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as [SIFT](http://corpus-texmex.irisa.fr/), [GIST](http://corpus-texmex.irisa.fr/), [Cohere](https://huggingface.co/datasets/Cohere/wikipedia-22-12/tree/main/en), and more. It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
38
42
 
39
43
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
40
44
 
45
+ ## Leaderboard
46
+ ### Introduction
47
+ To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
48
+
49
+ ### Scoring Rules
50
+
51
+ 1. For each case, select a base value and score each system based on relative values.
52
+ - For QPS and QP$, we use the highest value as the reference, denoted as `base_QPS` or `base_QP$`, and the score of each system is `(QPS/base_QPS) * 100` or `(QP$/base_QP$) * 100`.
53
+ - For Latency, we use the lowest value as the reference, that is, `base_Latency`, and the score of each system is `(Latency + 10ms)/(base_Latency + 10ms)`.
54
+
55
+ We want to give equal weight to different cases, and not let a case with high absolute result values become the sole reason for the overall scoring. Therefore, when scoring different systems in each case, we need to use relative values.
56
+
57
+ Also, for Latency, we add 10ms to the numerator and denominator to ensure that if every system performs particularly well in a case, its advantage will not be infinitely magnified when latency tends to 0.
58
+
59
+ 2. For systems that fail or timeout in a particular case, we will give them a score based on a value worse than the worst result by a factor of two. For example, in QPS or QP$, it would be half the lowest value. For Latency, it would be twice the maximum value.
60
+
61
+ 3. For each system, we will take the geometric mean of its scores in all cases as its comprehensive score for a particular metric.
62
+
41
63
  ## Build on your own
42
64
  ### Install requirements
43
65
  ``` shell
@@ -66,12 +88,14 @@ $ ruff check vectordb_bench --fix
66
88
 
67
89
  ## How does it work?
68
90
  ### Result Page
69
- ![image](https://github.com/liliu-z/VectorDBBench/assets/105927039/a8418fb6-0822-4f04-a04d-ab9143815b3e)
91
+ ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/7f5cdae7-f9f2-4a81-b2e0-e5c6268cd970)
70
92
  This is the main page of VectorDBBench, which displays the standard benchmark results we provide. Additionally, results of all tests performed by users themselves will also be shown here. We also offer the ability to select and compare results from multiple tests simultaneously.
71
93
 
72
- The standard benchmark results displayed here include all 12 cases that we currently support for all our clients (Milvus, Zilliz Cloud, Elastic Search, Qdrant Cloud, and Weaviate Cloud). However, as some systems may not be able to complete all the tests successfully due to issues like Out of Memory (OOM) or timeouts, not all clients are included in every case.
94
+ The standard benchmark results displayed here include all 9 cases that we currently support for all our clients (Milvus, Zilliz Cloud, Elastic Search, Qdrant Cloud, and Weaviate Cloud). However, as some systems may not be able to complete all the tests successfully due to issues like Out of Memory (OOM) or timeouts, not all clients are included in every case.
95
+
96
+ All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
73
97
  ### Run Test Page
74
- ![image](https://github.com/liliu-z/VectorDBBench/assets/105927039/364e2462-107e-4ce1-aabc-ff2b217ae9b7)
98
+ ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/a789099a-3707-4214-8052-b73463b8f2c6)
75
99
  This is the page to run a test:
76
100
  1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
77
101
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
@@ -80,11 +104,11 @@ Now we can only run one task at the same time.
80
104
 
81
105
  ## Module
82
106
  ### Code Structure
83
- ![image](https://github.com/liliu-z/VectorDBBench/assets/105927039/8d65c8b4-b9a3-4405-9db8-d27bf1ffda4b)
107
+ ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
84
108
  ### Client
85
109
  Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant, and Weaviate. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
86
110
  ### Benchmark Cases
87
- We've developed an array of 12 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
111
+ We've developed an array of 9 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
88
112
  #### Capacity Case
89
113
  - **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
90
114
  - **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 100K vectors, 128 dimensions).
@@ -92,30 +116,24 @@ We've developed an array of 12 comprehensive benchmark cases to test vector data
92
116
  - **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
93
117
  - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (Cohere 10M vectors, 768 dimensions).
94
118
  - **Medium Dataset:** A case using a medium dataset (Cohere 1M vectors, 768 dimensions).
95
- - **Small Dataset:** This case uses a small dataset (Cohere 100K vectors, 768 dimensions).
96
119
  #### Filtering Search Performance Case
97
120
  - **Large Dataset, Low Filtering Rate:** Evaluates search performance with a large dataset (Cohere 10M vectors, 768 dimensions) under a low filtering rate (1% vectors) at different parallel levels.
98
121
  - **Medium Dataset, Low Filtering Rate:** This case uses a medium dataset (Cohere 1M vectors, 768 dimensions) with a similar low filtering rate.
99
- - **Small Dataset, Low Filtering Rate:** This case uses a small dataset (Cohere 100K vectors, 768 dimensions) with a low filtering rate.
100
122
  - **Large Dataset, High Filtering Rate:** It tests with a large dataset (Cohere 10M vectors, 768 dimensions) but under a high filtering rate (99% vectors).
101
123
  - **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (Cohere 1M vectors, 768 dimensions) with a high filtering rate.
102
- - **Small Dataset, High Filtering Rate:** Finally, this case uses a small dataset (Cohere 100K vectors, 768 dimensions) under a high filtering rate.
103
124
  For a quick reference, here is a table summarizing the key aspects of each case:
104
125
 
105
- Case No. | Case Type | Dataset Size | Dataset Type | Filtering Rate | Results |
106
- |----------|-----------|--------------|--------------|----------------|---------|
107
- 1 | Capacity Case | Large Dim | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
108
- 2 | Capacity Case | Small Dim | SIFT 100K vectors, 128 dimensions | N/A | Number of inserted vectors |
109
- 3 | Search Performance Case | XLarge Dataset | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
110
- 4 | Search Performance Case | Large Dataset | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
111
- 5 | Search Performance Case | Medium Dataset | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
112
- 6 | Search Performance Case | Small Dataset | Cohere 100K vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
113
- 7 | Filtering Search Performance Case | Large Dataset, Low Filtering Rate | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
114
- 8 | Filtering Search Performance Case | Medium Dataset, Low Filtering Rate | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
115
- 9 | Filtering Search Performance Case | Small Dataset, Low Filtering Rate | Cohere 100K vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
116
- 10 | Filtering Search Performance Case | Large Dataset, High Filtering Rate | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
117
- 11 | Filtering Search Performance Case | Medium Dataset, High Filtering Rate | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
118
- 12 | Filtering Search Performance Case | Small Dataset, High Filtering Rate | Cohere 100K vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
126
+ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
127
+ |----------|-----------|--------------|----------------|---------|
128
+ 1 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
129
+ 2 | Capacity Case | SIFT 100K vectors, 128 dimensions | N/A | Number of inserted vectors |
130
+ 3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
131
+ 4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
132
+ 5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
133
+ 6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
134
+ 7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
135
+ 8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
136
+ 9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
119
137
 
120
138
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
121
139
 
@@ -136,7 +154,7 @@ VectorDBBench aims to provide a more comprehensive, multi-faceted testing enviro
136
154
 
137
155
  1. Navigate to the vectordb_bench/backend/clients directory.
138
156
  2. Create a new folder for your client, for example, "new_client".
139
- 3. Inside the "new_client" folder, create two files: new_client.py and config.py.
157
+ 3. Inside the "new_client" folder, create two files: new_client.py and config.py.
140
158
 
141
159
  **Step 2: Implement new_client.py and config.py**
142
160
 
@@ -201,7 +219,66 @@ For the system under test, we use the default server-side configuration to maint
201
219
  For the Client, we welcome any parameter tuning to obtain better results.
202
220
  ### Incomplete Results
203
221
  Many databases may not be able to complete all test cases due to issues such as Out of Memory (OOM), crashes, or timeouts. In these scenarios, we will clearly state these occurrences in the test results.
204
- ### Unpublishable Results
205
- Although we are trying to support as many clients as possible for benchmarking, due to the restrictions imposed by the [Dewitt Clause](https://cube.dev/blog/dewitt-clause-or-can-you-benchmark-a-database), we're unable to publish all benchmark results. This means that users may not be able to fully compare performance data for certain databases on our platform, despite the support we have integrated for these systems.
206
222
  ### Mistake Or Misrepresentation
207
223
  We strive for accuracy in learning and supporting various vector databases, yet there might be oversights or misapplications. For any such occurrences, feel free to [raise an issue](https://github.com/zilliztech/VectorDBBench/issues/new) or make amendments on our GitHub page.
224
+ ## Timeout
225
+ In our pursuit to ensure that our benchmark reflects the reality of a production environment while guaranteeing the practicality of the system, we have implemented a timeout plan based on our experiences for various tests.
226
+
227
+ **1. Capacity Case:**
228
+ - For Capacity Case, we have assigned an overall timeout.
229
+
230
+ **2. Other Cases:**
231
+
232
+ For other cases, we have set two timeouts:
233
+
234
+ - **Data Loading Timeout:** This timeout is designed to filter out systems that are too slow in inserting data, thus ensuring that we are only considering systems that is able to cope with the demands of a real-world production environment within a reasonable time frame.
235
+
236
+ - **Optimization Preparation Timeout**: This timeout is established to avoid excessive optimization strategies that might work for benchmarks but fail to deliver in real production environments. By doing this, we ensure that the systems we consider are not only suitable for testing environments but also applicable and efficient in production scenarios.
237
+
238
+ This multi-tiered timeout approach allows our benchmark to be more representative of actual production environments and assists us in identifying systems that can truly perform in real-world scenarios.
239
+ <table>
240
+ <tr>
241
+ <th>Case</th>
242
+ <th>Data Size</th>
243
+ <th>Timeout Type</th>
244
+ <th>Value</th>
245
+ </tr>
246
+ <tr>
247
+ <td>Capacity Case</td>
248
+ <td>N/A</td>
249
+ <td>Loading timeout</td>
250
+ <td>24 hours</td>
251
+ </tr>
252
+ <tr>
253
+ <td rowspan="2">Other Cases</td>
254
+ <td rowspan="2">1M vectors, 768 dimensions</td>
255
+ <td>Loading timeout</td>
256
+ <td>2.5 hours</td>
257
+ </tr>
258
+ <tr>
259
+ <td>Optimization timeout</td>
260
+ <td>15 mins</td>
261
+ </tr>
262
+ <tr>
263
+ <td rowspan="2">Other Cases</td>
264
+ <td rowspan="2">10M vectors, 768 dimensions</td>
265
+ <td>Loading timeout</td>
266
+ <td>25 hours</td>
267
+ </tr>
268
+ <tr>
269
+ <td>Optimization timeout</td>
270
+ <td>2.5 hours</td>
271
+ </tr>
272
+ <tr>
273
+ <td rowspan="2">Other Cases</td>
274
+ <td rowspan="2">100M vectors, 768 dimensions</td>
275
+ <td>Loading timeout</td>
276
+ <td>250 hours</td>
277
+ </tr>
278
+ <tr>
279
+ <td>Optimization timeout</td>
280
+ <td>25 hours</td>
281
+ </tr>
282
+ </table>
283
+
284
+ **Note:** Some datapoints in the standard benchmark results that voilate this timeout will be kept for now for reference. We will remove them in the future.
@@ -1,5 +1,9 @@
1
1
  # VectorDBBench: A Benchmark Tool for VectorDB
2
2
 
3
+ [![version](https://img.shields.io/pypi/v/vectordb-bench.svg?color=blue)](https://pypi.org/project/vectordb-bench/)
4
+ [![Downloads](https://pepy.tech/badge/vectordb-bench)](https://pepy.tech/project/vectordb-bench)
5
+
6
+ **Leaderboard:** https://zilliz.com/benchmark
3
7
  ## Quick Start
4
8
  ### Prerequirement
5
9
  ``` shell
@@ -20,10 +24,28 @@ VectorDBBench is not just an offering of benchmark results for mainstream vector
20
24
  Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
21
25
  To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
22
26
 
23
- Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as SIFT, GIST, Cohere, and more. It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
27
+ Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as [SIFT](http://corpus-texmex.irisa.fr/), [GIST](http://corpus-texmex.irisa.fr/), [Cohere](https://huggingface.co/datasets/Cohere/wikipedia-22-12/tree/main/en), and more. It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
24
28
 
25
29
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
26
30
 
31
+ ## Leaderboard
32
+ ### Introduction
33
+ To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
34
+
35
+ ### Scoring Rules
36
+
37
+ 1. For each case, select a base value and score each system based on relative values.
38
+ - For QPS and QP$, we use the highest value as the reference, denoted as `base_QPS` or `base_QP$`, and the score of each system is `(QPS/base_QPS) * 100` or `(QP$/base_QP$) * 100`.
39
+ - For Latency, we use the lowest value as the reference, that is, `base_Latency`, and the score of each system is `(Latency + 10ms)/(base_Latency + 10ms)`.
40
+
41
+ We want to give equal weight to different cases, and not let a case with high absolute result values become the sole reason for the overall scoring. Therefore, when scoring different systems in each case, we need to use relative values.
42
+
43
+ Also, for Latency, we add 10ms to the numerator and denominator to ensure that if every system performs particularly well in a case, its advantage will not be infinitely magnified when latency tends to 0.
44
+
45
+ 2. For systems that fail or timeout in a particular case, we will give them a score based on a value worse than the worst result by a factor of two. For example, in QPS or QP$, it would be half the lowest value. For Latency, it would be twice the maximum value.
46
+
47
+ 3. For each system, we will take the geometric mean of its scores in all cases as its comprehensive score for a particular metric.
48
+
27
49
  ## Build on your own
28
50
  ### Install requirements
29
51
  ``` shell
@@ -52,12 +74,14 @@ $ ruff check vectordb_bench --fix
52
74
 
53
75
  ## How does it work?
54
76
  ### Result Page
55
- ![image](https://github.com/liliu-z/VectorDBBench/assets/105927039/a8418fb6-0822-4f04-a04d-ab9143815b3e)
77
+ ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/7f5cdae7-f9f2-4a81-b2e0-e5c6268cd970)
56
78
  This is the main page of VectorDBBench, which displays the standard benchmark results we provide. Additionally, results of all tests performed by users themselves will also be shown here. We also offer the ability to select and compare results from multiple tests simultaneously.
57
79
 
58
- The standard benchmark results displayed here include all 12 cases that we currently support for all our clients (Milvus, Zilliz Cloud, Elastic Search, Qdrant Cloud, and Weaviate Cloud). However, as some systems may not be able to complete all the tests successfully due to issues like Out of Memory (OOM) or timeouts, not all clients are included in every case.
80
+ The standard benchmark results displayed here include all 9 cases that we currently support for all our clients (Milvus, Zilliz Cloud, Elastic Search, Qdrant Cloud, and Weaviate Cloud). However, as some systems may not be able to complete all the tests successfully due to issues like Out of Memory (OOM) or timeouts, not all clients are included in every case.
81
+
82
+ All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
59
83
  ### Run Test Page
60
- ![image](https://github.com/liliu-z/VectorDBBench/assets/105927039/364e2462-107e-4ce1-aabc-ff2b217ae9b7)
84
+ ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/a789099a-3707-4214-8052-b73463b8f2c6)
61
85
  This is the page to run a test:
62
86
  1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
63
87
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
@@ -66,11 +90,11 @@ Now we can only run one task at the same time.
66
90
 
67
91
  ## Module
68
92
  ### Code Structure
69
- ![image](https://github.com/liliu-z/VectorDBBench/assets/105927039/8d65c8b4-b9a3-4405-9db8-d27bf1ffda4b)
93
+ ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
70
94
  ### Client
71
95
  Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant, and Weaviate. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
72
96
  ### Benchmark Cases
73
- We've developed an array of 12 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
97
+ We've developed an array of 9 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
74
98
  #### Capacity Case
75
99
  - **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
76
100
  - **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 100K vectors, 128 dimensions).
@@ -78,30 +102,24 @@ We've developed an array of 12 comprehensive benchmark cases to test vector data
78
102
  - **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
79
103
  - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (Cohere 10M vectors, 768 dimensions).
80
104
  - **Medium Dataset:** A case using a medium dataset (Cohere 1M vectors, 768 dimensions).
81
- - **Small Dataset:** This case uses a small dataset (Cohere 100K vectors, 768 dimensions).
82
105
  #### Filtering Search Performance Case
83
106
  - **Large Dataset, Low Filtering Rate:** Evaluates search performance with a large dataset (Cohere 10M vectors, 768 dimensions) under a low filtering rate (1% vectors) at different parallel levels.
84
107
  - **Medium Dataset, Low Filtering Rate:** This case uses a medium dataset (Cohere 1M vectors, 768 dimensions) with a similar low filtering rate.
85
- - **Small Dataset, Low Filtering Rate:** This case uses a small dataset (Cohere 100K vectors, 768 dimensions) with a low filtering rate.
86
108
  - **Large Dataset, High Filtering Rate:** It tests with a large dataset (Cohere 10M vectors, 768 dimensions) but under a high filtering rate (99% vectors).
87
109
  - **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (Cohere 1M vectors, 768 dimensions) with a high filtering rate.
88
- - **Small Dataset, High Filtering Rate:** Finally, this case uses a small dataset (Cohere 100K vectors, 768 dimensions) under a high filtering rate.
89
110
  For a quick reference, here is a table summarizing the key aspects of each case:
90
111
 
91
- Case No. | Case Type | Dataset Size | Dataset Type | Filtering Rate | Results |
92
- |----------|-----------|--------------|--------------|----------------|---------|
93
- 1 | Capacity Case | Large Dim | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
94
- 2 | Capacity Case | Small Dim | SIFT 100K vectors, 128 dimensions | N/A | Number of inserted vectors |
95
- 3 | Search Performance Case | XLarge Dataset | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
96
- 4 | Search Performance Case | Large Dataset | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
97
- 5 | Search Performance Case | Medium Dataset | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
98
- 6 | Search Performance Case | Small Dataset | Cohere 100K vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
99
- 7 | Filtering Search Performance Case | Large Dataset, Low Filtering Rate | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
100
- 8 | Filtering Search Performance Case | Medium Dataset, Low Filtering Rate | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
101
- 9 | Filtering Search Performance Case | Small Dataset, Low Filtering Rate | Cohere 100K vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
102
- 10 | Filtering Search Performance Case | Large Dataset, High Filtering Rate | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
103
- 11 | Filtering Search Performance Case | Medium Dataset, High Filtering Rate | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
104
- 12 | Filtering Search Performance Case | Small Dataset, High Filtering Rate | Cohere 100K vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
112
+ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
113
+ |----------|-----------|--------------|----------------|---------|
114
+ 1 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
115
+ 2 | Capacity Case | SIFT 100K vectors, 128 dimensions | N/A | Number of inserted vectors |
116
+ 3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
117
+ 4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
118
+ 5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
119
+ 6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
120
+ 7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
121
+ 8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
122
+ 9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
105
123
 
106
124
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
107
125
 
@@ -122,7 +140,7 @@ VectorDBBench aims to provide a more comprehensive, multi-faceted testing enviro
122
140
 
123
141
  1. Navigate to the vectordb_bench/backend/clients directory.
124
142
  2. Create a new folder for your client, for example, "new_client".
125
- 3. Inside the "new_client" folder, create two files: new_client.py and config.py.
143
+ 3. Inside the "new_client" folder, create two files: new_client.py and config.py.
126
144
 
127
145
  **Step 2: Implement new_client.py and config.py**
128
146
 
@@ -187,7 +205,66 @@ For the system under test, we use the default server-side configuration to maint
187
205
  For the Client, we welcome any parameter tuning to obtain better results.
188
206
  ### Incomplete Results
189
207
  Many databases may not be able to complete all test cases due to issues such as Out of Memory (OOM), crashes, or timeouts. In these scenarios, we will clearly state these occurrences in the test results.
190
- ### Unpublishable Results
191
- Although we are trying to support as many clients as possible for benchmarking, due to the restrictions imposed by the [Dewitt Clause](https://cube.dev/blog/dewitt-clause-or-can-you-benchmark-a-database), we're unable to publish all benchmark results. This means that users may not be able to fully compare performance data for certain databases on our platform, despite the support we have integrated for these systems.
192
208
  ### Mistake Or Misrepresentation
193
209
  We strive for accuracy in learning and supporting various vector databases, yet there might be oversights or misapplications. For any such occurrences, feel free to [raise an issue](https://github.com/zilliztech/VectorDBBench/issues/new) or make amendments on our GitHub page.
210
+ ## Timeout
211
+ In our pursuit to ensure that our benchmark reflects the reality of a production environment while guaranteeing the practicality of the system, we have implemented a timeout plan based on our experiences for various tests.
212
+
213
+ **1. Capacity Case:**
214
+ - For Capacity Case, we have assigned an overall timeout.
215
+
216
+ **2. Other Cases:**
217
+
218
+ For other cases, we have set two timeouts:
219
+
220
+ - **Data Loading Timeout:** This timeout is designed to filter out systems that are too slow in inserting data, thus ensuring that we are only considering systems that is able to cope with the demands of a real-world production environment within a reasonable time frame.
221
+
222
+ - **Optimization Preparation Timeout**: This timeout is established to avoid excessive optimization strategies that might work for benchmarks but fail to deliver in real production environments. By doing this, we ensure that the systems we consider are not only suitable for testing environments but also applicable and efficient in production scenarios.
223
+
224
+ This multi-tiered timeout approach allows our benchmark to be more representative of actual production environments and assists us in identifying systems that can truly perform in real-world scenarios.
225
+ <table>
226
+ <tr>
227
+ <th>Case</th>
228
+ <th>Data Size</th>
229
+ <th>Timeout Type</th>
230
+ <th>Value</th>
231
+ </tr>
232
+ <tr>
233
+ <td>Capacity Case</td>
234
+ <td>N/A</td>
235
+ <td>Loading timeout</td>
236
+ <td>24 hours</td>
237
+ </tr>
238
+ <tr>
239
+ <td rowspan="2">Other Cases</td>
240
+ <td rowspan="2">1M vectors, 768 dimensions</td>
241
+ <td>Loading timeout</td>
242
+ <td>2.5 hours</td>
243
+ </tr>
244
+ <tr>
245
+ <td>Optimization timeout</td>
246
+ <td>15 mins</td>
247
+ </tr>
248
+ <tr>
249
+ <td rowspan="2">Other Cases</td>
250
+ <td rowspan="2">10M vectors, 768 dimensions</td>
251
+ <td>Loading timeout</td>
252
+ <td>25 hours</td>
253
+ </tr>
254
+ <tr>
255
+ <td>Optimization timeout</td>
256
+ <td>2.5 hours</td>
257
+ </tr>
258
+ <tr>
259
+ <td rowspan="2">Other Cases</td>
260
+ <td rowspan="2">100M vectors, 768 dimensions</td>
261
+ <td>Loading timeout</td>
262
+ <td>250 hours</td>
263
+ </tr>
264
+ <tr>
265
+ <td>Optimization timeout</td>
266
+ <td>25 hours</td>
267
+ </tr>
268
+ </table>
269
+
270
+ **Note:** Some datapoints in the standard benchmark results that voilate this timeout will be kept for now for reference. We will remove them in the future.
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools>=67.0", "wheel"]
2
+ requires = ["setuptools>=67.0", "wheel", "setuptools_scm[toml]>=6.2"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [tool.setuptools.package-data]
@@ -37,8 +37,11 @@ dependencies = [
37
37
  "scikit-learn",
38
38
  "s3fs",
39
39
  "psutil",
40
+ "polars",
41
+ "pgvector",
42
+ "sqlalchemy"
40
43
  ]
41
- version = "0.0.1"
44
+ dynamic = ["version"]
42
45
 
43
46
  [project.optional-dependencies]
44
47
  test = [
@@ -51,3 +54,5 @@ test = [
51
54
 
52
55
  [project.scripts]
53
56
  init_bench = "vectordb_bench.__main__:main"
57
+
58
+ [tool.setuptools_scm]
@@ -0,0 +1,4 @@
1
+ import sys
2
+
3
+ from os.path import dirname, abspath
4
+ sys.path.append(dirname(dirname(abspath(__file__))))
@@ -0,0 +1,4 @@
1
+ [pytest]
2
+
3
+ filterwarnings =
4
+ ignore::UserWarning
@@ -0,0 +1,36 @@
1
+ from vectordb_bench.backend.dataset import Dataset
2
+ import logging
3
+ import pytest
4
+ from pydantic import ValidationError
5
+
6
+
7
+ log = logging.getLogger("vectordb_bench")
8
+
9
+ class TestDataSet:
10
+ def test_iter_dataset(self):
11
+ for ds in Dataset:
12
+ log.info(ds)
13
+
14
+ def test_cohere(self):
15
+ cohere = Dataset.COHERE.get(100_000)
16
+ log.info(cohere)
17
+ assert cohere.name == "Cohere"
18
+ assert cohere.size == 100_000
19
+ assert cohere.label == "SMALL"
20
+ assert cohere.dim == 768
21
+
22
+ def test_cohere_error(self):
23
+ with pytest.raises(ValidationError):
24
+ Dataset.COHERE.get(9999)
25
+
26
+ def test_init_cohere(self):
27
+ coheres = [Dataset.COHERE.manager(i) for i in [100_000, 1_000_000, 10_000_000]]
28
+ for t in coheres:
29
+ t._validate_local_file()
30
+
31
+ def test_iter_cohere(self):
32
+ cohere_10m = Dataset.COHERE.manager(10_000_000)
33
+ cohere_10m.prepare(False)
34
+ for i in cohere_10m:
35
+ log.debug(i.head(1))
36
+
@@ -24,7 +24,7 @@ class TestModels:
24
24
  db=DB.Milvus,
25
25
  db_config=DB.Milvus.config(),
26
26
  db_case_config=DB.Milvus.case_config_cls(index=IndexType.Flat)(),
27
- case_config=CaseConfig(case_id=CaseType.PerformanceLZero),
27
+ case_config=CaseConfig(case_id=CaseType.Performance10M),
28
28
  ),
29
29
  metrics=Metric(),
30
30
  )
@@ -65,6 +65,6 @@ class TestModels:
65
65
 
66
66
  def test_test_result_display(self):
67
67
  result_dir = config.RESULTS_LOCAL_DIR
68
- for json_file in result_dir.glob("*.json"):
68
+ for json_file in result_dir.glob("result*.json"):
69
69
  res = TestResult.read_file(json_file)
70
70
  res.display()