vectordb-bench 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. vectordb_bench-0.0.8/.devcontainer/Dockerfile +10 -0
  2. vectordb_bench-0.0.8/.devcontainer/devcontainer.json +47 -0
  3. vectordb_bench-0.0.8/.github/workflows/pull_request.yml +36 -0
  4. vectordb_bench-0.0.8/Dockerfile +18 -0
  5. vectordb_bench-0.0.8/Makefile +2 -0
  6. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/PKG-INFO +15 -2
  7. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/README.md +13 -0
  8. vectordb_bench-0.0.8/install/requirements_py3.11.txt +23 -0
  9. vectordb_bench-0.0.8/install.py +72 -0
  10. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/pyproject.toml +5 -1
  11. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_chroma.py +3 -16
  12. vectordb_bench-0.0.8/tests/test_data_source.py +28 -0
  13. vectordb_bench-0.0.8/tests/test_dataset.py +77 -0
  14. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_utils.py +27 -0
  15. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/__init__.py +4 -4
  16. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/api.py +1 -0
  17. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/chroma/chroma.py +2 -14
  18. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/milvus/config.py +19 -0
  19. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pgvecto_rs/config.py +44 -32
  20. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +16 -16
  21. vectordb_bench-0.0.8/vectordb_bench/backend/clients/pgvector/config.py +100 -0
  22. vectordb_bench-0.0.8/vectordb_bench/backend/clients/pgvector/pgvector.py +187 -0
  23. vectordb_bench-0.0.8/vectordb_bench/backend/clients/qdrant_cloud/config.py +47 -0
  24. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +11 -7
  25. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -0
  26. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/data_source.py +13 -64
  27. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/dataset.py +45 -67
  28. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/runner/serial_runner.py +1 -1
  29. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/task_runner.py +2 -2
  30. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/utils.py +30 -0
  31. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/caseSelector.py +1 -1
  32. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/const/dbCaseConfigs.py +41 -77
  33. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/models.py +1 -0
  34. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +8 -0
  35. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +9 -3
  36. vectordb-bench-0.0.6/vectordb_bench/results/ZillizCloud/result_20240105_beta_202401_zillizcloud.json → vectordb_bench-0.0.8/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +365 -41
  37. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/getLeaderboardData.py +1 -1
  38. vectordb_bench-0.0.8/vectordb_bench/results/leaderboard.json +1 -0
  39. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/PKG-INFO +15 -2
  40. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/SOURCES.txt +8 -1
  41. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/requires.txt +1 -1
  42. vectordb-bench-0.0.6/tests/test_data_source.py +0 -78
  43. vectordb-bench-0.0.6/tests/test_dataset.py +0 -67
  44. vectordb-bench-0.0.6/vectordb_bench/backend/clients/pgvector/config.py +0 -49
  45. vectordb-bench-0.0.6/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -159
  46. vectordb-bench-0.0.6/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -34
  47. vectordb-bench-0.0.6/vectordb_bench/results/leaderboard.json +0 -1
  48. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.env.example +0 -0
  49. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.github/workflows/publish_package_on_release.yml +0 -0
  50. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.gitignore +0 -0
  51. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.ruff.toml +0 -0
  52. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/LICENSE +0 -0
  53. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/OWNERS +0 -0
  54. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/setup.cfg +0 -0
  55. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/conftest.py +0 -0
  56. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/pytest.ini +0 -0
  57. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_bench_runner.py +0 -0
  58. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_elasticsearch_cloud.py +0 -0
  59. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_models.py +0 -0
  60. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_redis.py +0 -0
  61. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/ut_cases.py +0 -0
  62. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/__init__.py +0 -0
  63. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/__main__.py +0 -0
  64. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/__init__.py +0 -0
  65. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/assembler.py +0 -0
  66. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/cases.py +0 -0
  67. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  68. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  69. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  70. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/milvus/milvus.py +0 -0
  71. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  72. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  73. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/redis/config.py +0 -0
  74. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  75. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  76. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  77. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  78. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/result_collector.py +0 -0
  79. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/runner/__init__.py +0 -0
  80. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/runner/mp_runner.py +0 -0
  81. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/base.py +0 -0
  82. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  83. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  84. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  85. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  86. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  87. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  88. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  89. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  90. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  91. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  92. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  93. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  94. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  95. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  96. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  97. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
  98. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/const/dbPrices.py +0 -0
  99. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/const/styles.py +0 -0
  100. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
  101. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/pages/run_test.py +0 -0
  102. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/utils.py +0 -0
  103. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
  104. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/interface.py +0 -0
  105. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/log_util.py +0 -0
  106. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/metric.py +0 -0
  107. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  108. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  109. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  110. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  111. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  112. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  113. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  114. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  115. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  116. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  117. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  118. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  119. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/dbPrices.json +0 -0
  120. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  121. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/entry_points.txt +0 -0
  122. {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ FROM python:3.11-buster as builder-image
2
+
3
+ RUN apt-get update
4
+
5
+ COPY ../install/requirements_py3.11.txt .
6
+ RUN pip3 install -U pip
7
+ RUN pip3 install --no-cache-dir -r requirements_py3.11.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
8
+
9
+ WORKDIR /opt/code
10
+ ENV PYTHONPATH /opt/code/VectorDBBench
@@ -0,0 +1,47 @@
1
+ // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
+ // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
3
+ {
4
+ "name": "VectorDBBench dev container",
5
+ "build": {
6
+ // Sets the run context to one level up instead of the .devcontainer folder.
7
+ "context": "..",
8
+ // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
9
+ "dockerfile": "./Dockerfile"
10
+ },
11
+ "runArgs": [
12
+ "--privileged",
13
+ "--cap-add=SYS_PTRACE"
14
+ ],
15
+ "mounts": [
16
+ // You have to make sure source directory is avaliable on your host file system.
17
+ "source=${localEnv:HOME}/vectordb_bench/dataset,target=/tmp/vectordb_bench/dataset,type=bind,consistency=cached"
18
+ ],
19
+ "workspaceMount": "source=${localWorkspaceFolder},target=/opt/code/VectorDBBench,type=bind,consistency=cached",
20
+ "workspaceFolder": "/opt/code/VectorDBBench",
21
+
22
+ // Features to add to the dev container. More info: https://containers.dev/features.
23
+ // "features": {},
24
+
25
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
26
+ "forwardPorts": [
27
+ 8501
28
+ ],
29
+
30
+ // Uncomment the next line to run commands after the container is created.
31
+ // "postCreateCommand": "cat /etc/os-release",
32
+
33
+ // Configure tool-specific properties.
34
+ "customizations": {
35
+ "vscode": {
36
+ "extensions": [
37
+ "eamodio.gitlens",
38
+ "ms-python.python",
39
+ "ms-python.debugpy",
40
+ "ms-azuretools.vscode-docker"
41
+ ]
42
+ }
43
+ }
44
+
45
+ // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
46
+ // "remoteUser": "devcontainer"
47
+ }
@@ -0,0 +1,36 @@
1
+ name: Test on pull request
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ name: Run Python Tests
11
+ strategy:
12
+ matrix:
13
+ python-version: [3.11, 3.12]
14
+ os: [ubuntu-latest, windows-latest]
15
+ runs-on: ${{ matrix.os }}
16
+
17
+ steps:
18
+ - name: Checkout code
19
+ uses: actions/checkout@v4
20
+ - name: Setup Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Fetch tags
26
+ run: |
27
+ git fetch --prune --unshallow --tags
28
+
29
+ - name: Install dependencies
30
+ run: |
31
+ python -m pip install --upgrade pip
32
+ pip install -e ".[test]"
33
+
34
+ - name: Test with pytest
35
+ run: |
36
+ make unittest
@@ -0,0 +1,18 @@
1
+ FROM python:3.11-buster as builder-image
2
+
3
+ RUN apt-get update
4
+
5
+ COPY install/requirements_py3.11.txt .
6
+ RUN pip3 install -U pip
7
+ RUN pip3 install --no-cache-dir -r requirements_py3.11.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
8
+
9
+ FROM python:3.11-slim-buster
10
+
11
+ COPY --from=builder-image /usr/local/bin /usr/local/bin
12
+ COPY --from=builder-image /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
13
+
14
+ WORKDIR /opt/code
15
+ COPY . .
16
+ ENV PYTHONPATH /opt/code
17
+
18
+ ENTRYPOINT ["python3", "-m", "vectordb_bench"]
@@ -0,0 +1,2 @@
1
+ unittest:
2
+ PYTHONPATH=`pwd` python3 -m pytest tests/test_dataset.py::TestDataSet::test_download_small -svv
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -49,7 +49,7 @@ Provides-Extra: elastic
49
49
  Requires-Dist: elasticsearch; extra == "elastic"
50
50
  Provides-Extra: pgvector
51
51
  Requires-Dist: pgvector; extra == "pgvector"
52
- Requires-Dist: sqlalchemy; extra == "pgvector"
52
+ Requires-Dist: psycopg2; extra == "pgvector"
53
53
  Provides-Extra: pgvecto-rs
54
54
  Requires-Dist: psycopg2; extra == "pgvecto-rs"
55
55
  Provides-Extra: redis
@@ -149,6 +149,19 @@ OR:
149
149
  ```shell
150
150
  $ init_bench
151
151
  ```
152
+
153
+ OR:
154
+
155
+ If you are using [dev container](https://code.visualstudio.com/docs/devcontainers/containers), create
156
+ the following dataset directory first:
157
+
158
+ ```shell
159
+ # Mount local ~/vectordb_bench/dataset to contain's /tmp/vectordb_bench/dataset.
160
+ # If you are not comfortable with the path name, feel free to change it in devcontainer.json
161
+ mkdir -p ~/vectordb_bench/dataset
162
+ ```
163
+ After reopen the repository in container, run `python -m vectordb_bench` in the container's bash.
164
+
152
165
  ### Check coding styles
153
166
  ```shell
154
167
  $ ruff check vectordb_bench
@@ -90,6 +90,19 @@ OR:
90
90
  ```shell
91
91
  $ init_bench
92
92
  ```
93
+
94
+ OR:
95
+
96
+ If you are using [dev container](https://code.visualstudio.com/docs/devcontainers/containers), create
97
+ the following dataset directory first:
98
+
99
+ ```shell
100
+ # Mount local ~/vectordb_bench/dataset to contain's /tmp/vectordb_bench/dataset.
101
+ # If you are not comfortable with the path name, feel free to change it in devcontainer.json
102
+ mkdir -p ~/vectordb_bench/dataset
103
+ ```
104
+ After reopen the repository in container, run `python -m vectordb_bench` in the container's bash.
105
+
93
106
  ### Check coding styles
94
107
  ```shell
95
108
  $ ruff check vectordb_bench
@@ -0,0 +1,23 @@
1
+ grpcio==1.53.0
2
+ grpcio-tools==1.53.0
3
+ qdrant-client
4
+ pinecone-client
5
+ weaviate-client
6
+ elasticsearch
7
+ pgvector
8
+ sqlalchemy
9
+ redis
10
+ chromadb
11
+ pytz
12
+ streamlit-autorefresh
13
+ streamlit>=1.23.0
14
+ streamlit_extras
15
+ tqdm
16
+ s3fs
17
+ psutil
18
+ polars
19
+ plotly
20
+ environs
21
+ pydantic<v2
22
+ scikit-learn
23
+ pymilvus
@@ -0,0 +1,72 @@
1
+ import os
2
+ import argparse
3
+ import subprocess
4
+
5
+ def docker_tag_base():
6
+ return 'vdbbench'
7
+
8
+ def dockerfile_path_base():
9
+ return os.path.join('vectordb_bench/', '../Dockerfile')
10
+
11
+ def docker_tag(track, algo):
12
+ return docker_tag_base() + '-' + track + '-' + algo
13
+
14
+
15
+ def build(tag, args, dockerfile):
16
+ print('Building %s...' % tag)
17
+ if args is not None and len(args) != 0:
18
+ q = " ".join(["--build-arg " + x.replace(" ", "\\ ") for x in args])
19
+ else:
20
+ q = ""
21
+
22
+ try:
23
+ command = 'docker build %s --rm -t %s -f' \
24
+ % (q, tag)
25
+ command += ' %s .' % dockerfile
26
+ print(command)
27
+ subprocess.check_call(command, shell=True)
28
+ return {tag: 'success'}
29
+ except subprocess.CalledProcessError:
30
+ return {tag: 'fail'}
31
+
32
+ def build_multiprocess(args):
33
+ return build(*args)
34
+
35
+
36
+ if __name__ == "__main__":
37
+ parser = argparse.ArgumentParser(
38
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
39
+ parser.add_argument(
40
+ "--proc",
41
+ default=1,
42
+ type=int,
43
+ help="the number of process to build docker images")
44
+ parser.add_argument(
45
+ '--track',
46
+ choices=['none'],
47
+ default='none'
48
+ )
49
+ parser.add_argument(
50
+ '--algorithm',
51
+ metavar='NAME',
52
+ help='build only the named algorithm image',
53
+ default=None)
54
+ parser.add_argument(
55
+ '--dockerfile',
56
+ metavar='PATH',
57
+ help='build only the image from a Dockerfile path',
58
+ default=None)
59
+ parser.add_argument(
60
+ '--build-arg',
61
+ help='pass given args to all docker builds',
62
+ nargs="+")
63
+ args = parser.parse_args()
64
+
65
+ print('Building base image...')
66
+
67
+ subprocess.check_call(
68
+ 'docker build \
69
+ --rm -t %s -f %s .' % (docker_tag_base(), dockerfile_path_base()), shell=True)
70
+
71
+ print('Building end.')
72
+
@@ -5,6 +5,10 @@ build-backend = "setuptools.build_meta"
5
5
  [tool.setuptools.package-data]
6
6
  "vectordb_bench.results" = ["*.json"]
7
7
 
8
+ [tool.setuptools.packages.find]
9
+ where = ["."]
10
+ include = ["vectordb_bench"]
11
+
8
12
  [project]
9
13
  name = "vectordb-bench"
10
14
  authors = [
@@ -61,7 +65,7 @@ qdrant = [ "qdrant-client" ]
61
65
  pinecone = [ "pinecone-client" ]
62
66
  weaviate = [ "weaviate-client" ]
63
67
  elastic = [ "elasticsearch" ]
64
- pgvector = [ "pgvector", "sqlalchemy" ]
68
+ pgvector = [ "pgvector", "psycopg2" ]
65
69
  pgvecto_rs = [ "psycopg2" ]
66
70
  redis = [ "redis" ]
67
71
  chromadb = [ "chromadb" ]
@@ -52,6 +52,7 @@ class TestChroma:
52
52
 
53
53
  # insert
54
54
  with chrma.init():
55
+ #chrma.client.delete_collection("example2")
55
56
  assert (chrma.client.heartbeat() is not None), "chroma client is not connected"
56
57
  res = chrma.insert_embeddings(embeddings=embeddings, metadata=range(count))
57
58
  # bulk_insert return
@@ -87,7 +88,7 @@ class TestChroma:
87
88
 
88
89
 
89
90
  res = chrma.search_embedding(
90
- query=q, k=100, filters={"metadata": filter_value}
91
+ query=q, k=100, filters={"id": filter_value}
91
92
  )
92
93
  assert (
93
94
  res[0] == int(test_id)
@@ -101,18 +102,4 @@ class TestChroma:
101
102
  break
102
103
  assert isFilter, f"Filter not working, id_list: {id_list}"
103
104
 
104
- #Test id filter
105
- res = chrma.search_embedding(
106
- query=q, k=100, filters={"id": 9999}
107
- )
108
- assert (
109
- res[0] == 9999
110
- )
111
-
112
- #Test two filters, id and metadata
113
- res = chrma.search_embedding(
114
- query=q, k=100, filters={"metadata": filter_value, "id": 9999}
115
- )
116
- assert (
117
- res[0] == 9999 and len(res) == 1, f"filters failed, got: ({res[0]}), expected ({9999})"
118
- )
105
+
@@ -0,0 +1,28 @@
1
+ import logging
2
+ import pytest
3
+ from vectordb_bench.backend.data_source import DatasetSource
4
+ from vectordb_bench.backend.cases import type2case
5
+
6
+ log = logging.getLogger("vectordb_bench")
7
+
8
+ class TestReader:
9
+ @pytest.mark.parametrize("type_case", [
10
+ (k, v) for k, v in type2case.items()
11
+ ])
12
+ def test_type_cases(self, type_case):
13
+ self.per_case_test(type_case)
14
+
15
+
16
+ def per_case_test(self, type_case):
17
+ t, ca_cls = type_case
18
+ ca = ca_cls()
19
+ log.info(f"test case: {t.name}, {ca.name}")
20
+
21
+ filters = ca.filter_rate
22
+ ca.dataset.prepare(source=DatasetSource.AliyunOSS, filters=filters)
23
+ ali_trains = ca.dataset.train_files
24
+
25
+ ca.dataset.prepare(filters=filters)
26
+ s3_trains = ca.dataset.train_files
27
+
28
+ assert ali_trains == s3_trains
@@ -0,0 +1,77 @@
1
+ from vectordb_bench.backend.dataset import Dataset
2
+ import logging
3
+ import pytest
4
+ from pydantic import ValidationError
5
+ from vectordb_bench.backend.data_source import DatasetSource
6
+
7
+
8
+ log = logging.getLogger("vectordb_bench")
9
+
10
+ class TestDataSet:
11
+ def test_iter_dataset(self):
12
+ for ds in Dataset:
13
+ log.info(ds)
14
+
15
+ def test_cohere(self):
16
+ cohere = Dataset.COHERE.get(100_000)
17
+ log.info(cohere)
18
+ assert cohere.name == "Cohere"
19
+ assert cohere.size == 100_000
20
+ assert cohere.label == "SMALL"
21
+ assert cohere.dim == 768
22
+
23
+ def test_cohere_error(self):
24
+ with pytest.raises(ValidationError):
25
+ Dataset.COHERE.get(9999)
26
+
27
+ def test_iter_cohere(self):
28
+ cohere_10m = Dataset.COHERE.manager(10_000_000)
29
+ cohere_10m.prepare()
30
+
31
+ import time
32
+ before = time.time()
33
+ for i in cohere_10m:
34
+ log.debug(i.head(1))
35
+
36
+ dur_iter = time.time() - before
37
+ log.warning(f"iter through cohere_10m cost={dur_iter/60}min")
38
+
39
+ # pytest -sv tests/test_dataset.py::TestDataSet::test_iter_laion
40
+ def test_iter_laion(self):
41
+ laion_100m = Dataset.LAION.manager(100_000_000)
42
+ from vectordb_bench.backend.data_source import DatasetSource
43
+ laion_100m.prepare(source=DatasetSource.AliyunOSS)
44
+
45
+ import time
46
+ before = time.time()
47
+ for i in laion_100m:
48
+ log.debug(i.head(1))
49
+
50
+ dur_iter = time.time() - before
51
+ log.warning(f"iter through laion_100m cost={dur_iter/60}min")
52
+
53
+ def test_download_small(self):
54
+ openai_50k = Dataset.OPENAI.manager(50_000)
55
+ files = [
56
+ "test.parquet",
57
+ "neighbors.parquet",
58
+ "neighbors_head_1p.parquet",
59
+ "neighbors_tail_1p.parquet",
60
+ ]
61
+
62
+ file_path = openai_50k.data_dir.joinpath("test.parquet")
63
+ import os
64
+
65
+ DatasetSource.S3.reader().read(
66
+ openai_50k.data.dir_name.lower(),
67
+ files=files,
68
+ local_ds_root=openai_50k.data_dir,
69
+ )
70
+
71
+ os.remove(file_path)
72
+ DatasetSource.AliyunOSS.reader().read(
73
+ openai_50k.data.dir_name.lower(),
74
+ files=files,
75
+ local_ds_root=openai_50k.data_dir,
76
+ )
77
+
@@ -37,3 +37,30 @@ class TestUtils:
37
37
  log.info(f"recall: {res}, expected: {expected}")
38
38
  assert res == expected
39
39
 
40
+
41
+ class TestGetFiles:
42
+ @pytest.mark.parametrize("train_count", [
43
+ 1,
44
+ 10,
45
+ 50,
46
+ 100,
47
+ ])
48
+ def test_train_count(self, train_count):
49
+ files = utils.compose_train_files(train_count, True)
50
+ log.info(files)
51
+
52
+ assert len(files) == train_count
53
+
54
+ @pytest.mark.parametrize("use_shuffled", [True, False])
55
+ def test_use_shuffled(self, use_shuffled):
56
+ files = utils.compose_train_files(1, use_shuffled)
57
+ log.info(files)
58
+
59
+ trains = [f for f in files if "train" in f]
60
+ if use_shuffled:
61
+ for t in trains:
62
+ assert "shuffle_train" in t
63
+ else:
64
+ for t in trains:
65
+ assert "shuffle" not in t
66
+ assert "train" in t
@@ -54,8 +54,8 @@ class DB(Enum):
54
54
  return ElasticCloud
55
55
 
56
56
  if self == DB.QdrantCloud:
57
- from .qdrant_cloud.qdrant_cloud import QdrantClient
58
- return QdrantClient
57
+ from .qdrant_cloud.qdrant_cloud import QdrantCloud
58
+ return QdrantCloud
59
59
 
60
60
  if self == DB.WeaviateCloud:
61
61
  from .weaviate_cloud.weaviate_cloud import WeaviateCloud
@@ -142,8 +142,8 @@ class DB(Enum):
142
142
  return WeaviateIndexConfig
143
143
 
144
144
  if self == DB.PgVector:
145
- from .pgvector.config import PgVectorIndexConfig
146
- return PgVectorIndexConfig
145
+ from .pgvector.config import _pgvector_case_config
146
+ return _pgvector_case_config.get(index_type)
147
147
 
148
148
  if self == DB.PgVectoRS:
149
149
  from .pgvecto_rs.config import _pgvecto_rs_case_config
@@ -16,6 +16,7 @@ class IndexType(str, Enum):
16
16
  HNSW = "HNSW"
17
17
  DISKANN = "DISKANN"
18
18
  IVFFlat = "IVF_FLAT"
19
+ IVFSQ8 = "IVF_SQ8"
19
20
  Flat = "FLAT"
20
21
  AUTOINDEX = "AUTOINDEX"
21
22
  ES_HNSW = "hnsw"
@@ -106,21 +106,9 @@ class ChromaClient(VectorDB):
106
106
  """
107
107
  if filters:
108
108
  # assumes benchmark test filters of format: {'metadata': '>=10000', 'id': 10000}
109
- metadata_value = filters.get("metadata")
110
109
  id_value = filters.get("id")
111
- if metadata_value and id_value:
112
- results = self.collection.query(
113
- query_embeddings=query, n_results=k,
114
- where={"$and": [{"id": {"$eq": id_value}},
115
- {"id": {"$gt": metadata_value}}
116
- ]}
117
- )
118
- elif metadata_value:
119
- results = self.collection.query(query_embeddings=query, n_results=k,
120
- where={"id": {"$gt": metadata_value}})
121
- else:
122
- results = self.collection.query(query_embeddings=query, n_results=k,
123
- where={"id": {"$eq": id_value}})
110
+ results = self.collection.query(query_embeddings=query, n_results=k,
111
+ where={"id": {"$gt": id_value}})
124
112
  #return list of id's in results
125
113
  return [int(i) for i in results.get('ids')[0]]
126
114
  results = self.collection.query(query_embeddings=query, n_results=k)
@@ -95,6 +95,24 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
95
95
  "metric_type": self.parse_metric(),
96
96
  "params": {"nprobe": self.nprobe},
97
97
  }
98
+
99
+ class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
100
+ nlist: int
101
+ nprobe: int | None = None
102
+ index: IndexType = IndexType.IVFSQ8
103
+
104
+ def index_param(self) -> dict:
105
+ return {
106
+ "metric_type": self.parse_metric(),
107
+ "index_type": self.index.value,
108
+ "params": {"nlist": self.nlist},
109
+ }
110
+
111
+ def search_param(self) -> dict:
112
+ return {
113
+ "metric_type": self.parse_metric(),
114
+ "params": {"nprobe": self.nprobe},
115
+ }
98
116
 
99
117
 
100
118
  class FLATConfig(MilvusIndexConfig, DBCaseConfig):
@@ -210,6 +228,7 @@ _milvus_case_config = {
210
228
  IndexType.HNSW: HNSWConfig,
211
229
  IndexType.DISKANN: DISKANNConfig,
212
230
  IndexType.IVFFlat: IVFFlatConfig,
231
+ IndexType.IVFSQ8: IVFSQ8Config,
213
232
  IndexType.Flat: FLATConfig,
214
233
  IndexType.GPU_IVF_FLAT: GPUIVFFlatConfig,
215
234
  IndexType.GPU_IVF_PQ: GPUIVFPQConfig,