vectordb-bench 0.0.6__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench-0.0.8/.devcontainer/Dockerfile +10 -0
- vectordb_bench-0.0.8/.devcontainer/devcontainer.json +47 -0
- vectordb_bench-0.0.8/.github/workflows/pull_request.yml +36 -0
- vectordb_bench-0.0.8/Dockerfile +18 -0
- vectordb_bench-0.0.8/Makefile +2 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/PKG-INFO +15 -2
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/README.md +13 -0
- vectordb_bench-0.0.8/install/requirements_py3.11.txt +23 -0
- vectordb_bench-0.0.8/install.py +72 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/pyproject.toml +5 -1
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_chroma.py +3 -16
- vectordb_bench-0.0.8/tests/test_data_source.py +28 -0
- vectordb_bench-0.0.8/tests/test_dataset.py +77 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_utils.py +27 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/__init__.py +4 -4
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/api.py +1 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/chroma/chroma.py +2 -14
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/milvus/config.py +19 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pgvecto_rs/config.py +44 -32
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +16 -16
- vectordb_bench-0.0.8/vectordb_bench/backend/clients/pgvector/config.py +100 -0
- vectordb_bench-0.0.8/vectordb_bench/backend/clients/pgvector/pgvector.py +187 -0
- vectordb_bench-0.0.8/vectordb_bench/backend/clients/qdrant_cloud/config.py +47 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +11 -7
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/data_source.py +13 -64
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/dataset.py +45 -67
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/runner/serial_runner.py +1 -1
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/task_runner.py +2 -2
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/utils.py +30 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/caseSelector.py +1 -1
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/const/dbCaseConfigs.py +41 -77
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/models.py +1 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +8 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +9 -3
- vectordb-bench-0.0.6/vectordb_bench/results/ZillizCloud/result_20240105_beta_202401_zillizcloud.json → vectordb_bench-0.0.8/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +365 -41
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/getLeaderboardData.py +1 -1
- vectordb_bench-0.0.8/vectordb_bench/results/leaderboard.json +1 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/PKG-INFO +15 -2
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/SOURCES.txt +8 -1
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/requires.txt +1 -1
- vectordb-bench-0.0.6/tests/test_data_source.py +0 -78
- vectordb-bench-0.0.6/tests/test_dataset.py +0 -67
- vectordb-bench-0.0.6/vectordb_bench/backend/clients/pgvector/config.py +0 -49
- vectordb-bench-0.0.6/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -159
- vectordb-bench-0.0.6/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -34
- vectordb-bench-0.0.6/vectordb_bench/results/leaderboard.json +0 -1
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.env.example +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.gitignore +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/.ruff.toml +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/LICENSE +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/OWNERS +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/setup.cfg +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/conftest.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/pytest.ini +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_bench_runner.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_models.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/test_redis.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/tests/ut_cases.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/__init__.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/__main__.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/assembler.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/cases.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/chroma/config.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/milvus/milvus.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/redis/config.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/redis/redis.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/result_collector.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/runner/__init__.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/runner/mp_runner.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/base.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/data.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/const/dbPrices.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/const/styles.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/pages/run_test.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/utils.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/interface.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/log_util.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/metric.py +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/results/dbPrices.json +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/entry_points.txt +0 -0
- {vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
FROM python:3.11-buster as builder-image
|
2
|
+
|
3
|
+
RUN apt-get update
|
4
|
+
|
5
|
+
COPY ../install/requirements_py3.11.txt .
|
6
|
+
RUN pip3 install -U pip
|
7
|
+
RUN pip3 install --no-cache-dir -r requirements_py3.11.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
8
|
+
|
9
|
+
WORKDIR /opt/code
|
10
|
+
ENV PYTHONPATH /opt/code/VectorDBBench
|
@@ -0,0 +1,47 @@
|
|
1
|
+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
2
|
+
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
|
3
|
+
{
|
4
|
+
"name": "VectorDBBench dev container",
|
5
|
+
"build": {
|
6
|
+
// Sets the run context to one level up instead of the .devcontainer folder.
|
7
|
+
"context": "..",
|
8
|
+
// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
|
9
|
+
"dockerfile": "./Dockerfile"
|
10
|
+
},
|
11
|
+
"runArgs": [
|
12
|
+
"--privileged",
|
13
|
+
"--cap-add=SYS_PTRACE"
|
14
|
+
],
|
15
|
+
"mounts": [
|
16
|
+
// You have to make sure source directory is avaliable on your host file system.
|
17
|
+
"source=${localEnv:HOME}/vectordb_bench/dataset,target=/tmp/vectordb_bench/dataset,type=bind,consistency=cached"
|
18
|
+
],
|
19
|
+
"workspaceMount": "source=${localWorkspaceFolder},target=/opt/code/VectorDBBench,type=bind,consistency=cached",
|
20
|
+
"workspaceFolder": "/opt/code/VectorDBBench",
|
21
|
+
|
22
|
+
// Features to add to the dev container. More info: https://containers.dev/features.
|
23
|
+
// "features": {},
|
24
|
+
|
25
|
+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
26
|
+
"forwardPorts": [
|
27
|
+
8501
|
28
|
+
],
|
29
|
+
|
30
|
+
// Uncomment the next line to run commands after the container is created.
|
31
|
+
// "postCreateCommand": "cat /etc/os-release",
|
32
|
+
|
33
|
+
// Configure tool-specific properties.
|
34
|
+
"customizations": {
|
35
|
+
"vscode": {
|
36
|
+
"extensions": [
|
37
|
+
"eamodio.gitlens",
|
38
|
+
"ms-python.python",
|
39
|
+
"ms-python.debugpy",
|
40
|
+
"ms-azuretools.vscode-docker"
|
41
|
+
]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
|
46
|
+
// "remoteUser": "devcontainer"
|
47
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
name: Test on pull request
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
branches:
|
6
|
+
- main
|
7
|
+
|
8
|
+
jobs:
|
9
|
+
build:
|
10
|
+
name: Run Python Tests
|
11
|
+
strategy:
|
12
|
+
matrix:
|
13
|
+
python-version: [3.11, 3.12]
|
14
|
+
os: [ubuntu-latest, windows-latest]
|
15
|
+
runs-on: ${{ matrix.os }}
|
16
|
+
|
17
|
+
steps:
|
18
|
+
- name: Checkout code
|
19
|
+
uses: actions/checkout@v4
|
20
|
+
- name: Setup Python ${{ matrix.python-version }}
|
21
|
+
uses: actions/setup-python@v5
|
22
|
+
with:
|
23
|
+
python-version: ${{ matrix.python-version }}
|
24
|
+
|
25
|
+
- name: Fetch tags
|
26
|
+
run: |
|
27
|
+
git fetch --prune --unshallow --tags
|
28
|
+
|
29
|
+
- name: Install dependencies
|
30
|
+
run: |
|
31
|
+
python -m pip install --upgrade pip
|
32
|
+
pip install -e ".[test]"
|
33
|
+
|
34
|
+
- name: Test with pytest
|
35
|
+
run: |
|
36
|
+
make unittest
|
@@ -0,0 +1,18 @@
|
|
1
|
+
FROM python:3.11-buster as builder-image
|
2
|
+
|
3
|
+
RUN apt-get update
|
4
|
+
|
5
|
+
COPY install/requirements_py3.11.txt .
|
6
|
+
RUN pip3 install -U pip
|
7
|
+
RUN pip3 install --no-cache-dir -r requirements_py3.11.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
8
|
+
|
9
|
+
FROM python:3.11-slim-buster
|
10
|
+
|
11
|
+
COPY --from=builder-image /usr/local/bin /usr/local/bin
|
12
|
+
COPY --from=builder-image /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
13
|
+
|
14
|
+
WORKDIR /opt/code
|
15
|
+
COPY . .
|
16
|
+
ENV PYTHONPATH /opt/code
|
17
|
+
|
18
|
+
ENTRYPOINT ["python3", "-m", "vectordb_bench"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -49,7 +49,7 @@ Provides-Extra: elastic
|
|
49
49
|
Requires-Dist: elasticsearch; extra == "elastic"
|
50
50
|
Provides-Extra: pgvector
|
51
51
|
Requires-Dist: pgvector; extra == "pgvector"
|
52
|
-
Requires-Dist:
|
52
|
+
Requires-Dist: psycopg2; extra == "pgvector"
|
53
53
|
Provides-Extra: pgvecto-rs
|
54
54
|
Requires-Dist: psycopg2; extra == "pgvecto-rs"
|
55
55
|
Provides-Extra: redis
|
@@ -149,6 +149,19 @@ OR:
|
|
149
149
|
```shell
|
150
150
|
$ init_bench
|
151
151
|
```
|
152
|
+
|
153
|
+
OR:
|
154
|
+
|
155
|
+
If you are using [dev container](https://code.visualstudio.com/docs/devcontainers/containers), create
|
156
|
+
the following dataset directory first:
|
157
|
+
|
158
|
+
```shell
|
159
|
+
# Mount local ~/vectordb_bench/dataset to contain's /tmp/vectordb_bench/dataset.
|
160
|
+
# If you are not comfortable with the path name, feel free to change it in devcontainer.json
|
161
|
+
mkdir -p ~/vectordb_bench/dataset
|
162
|
+
```
|
163
|
+
After reopen the repository in container, run `python -m vectordb_bench` in the container's bash.
|
164
|
+
|
152
165
|
### Check coding styles
|
153
166
|
```shell
|
154
167
|
$ ruff check vectordb_bench
|
@@ -90,6 +90,19 @@ OR:
|
|
90
90
|
```shell
|
91
91
|
$ init_bench
|
92
92
|
```
|
93
|
+
|
94
|
+
OR:
|
95
|
+
|
96
|
+
If you are using [dev container](https://code.visualstudio.com/docs/devcontainers/containers), create
|
97
|
+
the following dataset directory first:
|
98
|
+
|
99
|
+
```shell
|
100
|
+
# Mount local ~/vectordb_bench/dataset to contain's /tmp/vectordb_bench/dataset.
|
101
|
+
# If you are not comfortable with the path name, feel free to change it in devcontainer.json
|
102
|
+
mkdir -p ~/vectordb_bench/dataset
|
103
|
+
```
|
104
|
+
After reopen the repository in container, run `python -m vectordb_bench` in the container's bash.
|
105
|
+
|
93
106
|
### Check coding styles
|
94
107
|
```shell
|
95
108
|
$ ruff check vectordb_bench
|
@@ -0,0 +1,23 @@
|
|
1
|
+
grpcio==1.53.0
|
2
|
+
grpcio-tools==1.53.0
|
3
|
+
qdrant-client
|
4
|
+
pinecone-client
|
5
|
+
weaviate-client
|
6
|
+
elasticsearch
|
7
|
+
pgvector
|
8
|
+
sqlalchemy
|
9
|
+
redis
|
10
|
+
chromadb
|
11
|
+
pytz
|
12
|
+
streamlit-autorefresh
|
13
|
+
streamlit>=1.23.0
|
14
|
+
streamlit_extras
|
15
|
+
tqdm
|
16
|
+
s3fs
|
17
|
+
psutil
|
18
|
+
polars
|
19
|
+
plotly
|
20
|
+
environs
|
21
|
+
pydantic<v2
|
22
|
+
scikit-learn
|
23
|
+
pymilvus
|
@@ -0,0 +1,72 @@
|
|
1
|
+
import os
|
2
|
+
import argparse
|
3
|
+
import subprocess
|
4
|
+
|
5
|
+
def docker_tag_base():
|
6
|
+
return 'vdbbench'
|
7
|
+
|
8
|
+
def dockerfile_path_base():
|
9
|
+
return os.path.join('vectordb_bench/', '../Dockerfile')
|
10
|
+
|
11
|
+
def docker_tag(track, algo):
|
12
|
+
return docker_tag_base() + '-' + track + '-' + algo
|
13
|
+
|
14
|
+
|
15
|
+
def build(tag, args, dockerfile):
|
16
|
+
print('Building %s...' % tag)
|
17
|
+
if args is not None and len(args) != 0:
|
18
|
+
q = " ".join(["--build-arg " + x.replace(" ", "\\ ") for x in args])
|
19
|
+
else:
|
20
|
+
q = ""
|
21
|
+
|
22
|
+
try:
|
23
|
+
command = 'docker build %s --rm -t %s -f' \
|
24
|
+
% (q, tag)
|
25
|
+
command += ' %s .' % dockerfile
|
26
|
+
print(command)
|
27
|
+
subprocess.check_call(command, shell=True)
|
28
|
+
return {tag: 'success'}
|
29
|
+
except subprocess.CalledProcessError:
|
30
|
+
return {tag: 'fail'}
|
31
|
+
|
32
|
+
def build_multiprocess(args):
|
33
|
+
return build(*args)
|
34
|
+
|
35
|
+
|
36
|
+
if __name__ == "__main__":
|
37
|
+
parser = argparse.ArgumentParser(
|
38
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
39
|
+
parser.add_argument(
|
40
|
+
"--proc",
|
41
|
+
default=1,
|
42
|
+
type=int,
|
43
|
+
help="the number of process to build docker images")
|
44
|
+
parser.add_argument(
|
45
|
+
'--track',
|
46
|
+
choices=['none'],
|
47
|
+
default='none'
|
48
|
+
)
|
49
|
+
parser.add_argument(
|
50
|
+
'--algorithm',
|
51
|
+
metavar='NAME',
|
52
|
+
help='build only the named algorithm image',
|
53
|
+
default=None)
|
54
|
+
parser.add_argument(
|
55
|
+
'--dockerfile',
|
56
|
+
metavar='PATH',
|
57
|
+
help='build only the image from a Dockerfile path',
|
58
|
+
default=None)
|
59
|
+
parser.add_argument(
|
60
|
+
'--build-arg',
|
61
|
+
help='pass given args to all docker builds',
|
62
|
+
nargs="+")
|
63
|
+
args = parser.parse_args()
|
64
|
+
|
65
|
+
print('Building base image...')
|
66
|
+
|
67
|
+
subprocess.check_call(
|
68
|
+
'docker build \
|
69
|
+
--rm -t %s -f %s .' % (docker_tag_base(), dockerfile_path_base()), shell=True)
|
70
|
+
|
71
|
+
print('Building end.')
|
72
|
+
|
@@ -5,6 +5,10 @@ build-backend = "setuptools.build_meta"
|
|
5
5
|
[tool.setuptools.package-data]
|
6
6
|
"vectordb_bench.results" = ["*.json"]
|
7
7
|
|
8
|
+
[tool.setuptools.packages.find]
|
9
|
+
where = ["."]
|
10
|
+
include = ["vectordb_bench"]
|
11
|
+
|
8
12
|
[project]
|
9
13
|
name = "vectordb-bench"
|
10
14
|
authors = [
|
@@ -61,7 +65,7 @@ qdrant = [ "qdrant-client" ]
|
|
61
65
|
pinecone = [ "pinecone-client" ]
|
62
66
|
weaviate = [ "weaviate-client" ]
|
63
67
|
elastic = [ "elasticsearch" ]
|
64
|
-
pgvector = [ "pgvector", "
|
68
|
+
pgvector = [ "pgvector", "psycopg2" ]
|
65
69
|
pgvecto_rs = [ "psycopg2" ]
|
66
70
|
redis = [ "redis" ]
|
67
71
|
chromadb = [ "chromadb" ]
|
@@ -52,6 +52,7 @@ class TestChroma:
|
|
52
52
|
|
53
53
|
# insert
|
54
54
|
with chrma.init():
|
55
|
+
#chrma.client.delete_collection("example2")
|
55
56
|
assert (chrma.client.heartbeat() is not None), "chroma client is not connected"
|
56
57
|
res = chrma.insert_embeddings(embeddings=embeddings, metadata=range(count))
|
57
58
|
# bulk_insert return
|
@@ -87,7 +88,7 @@ class TestChroma:
|
|
87
88
|
|
88
89
|
|
89
90
|
res = chrma.search_embedding(
|
90
|
-
query=q, k=100, filters={"
|
91
|
+
query=q, k=100, filters={"id": filter_value}
|
91
92
|
)
|
92
93
|
assert (
|
93
94
|
res[0] == int(test_id)
|
@@ -101,18 +102,4 @@ class TestChroma:
|
|
101
102
|
break
|
102
103
|
assert isFilter, f"Filter not working, id_list: {id_list}"
|
103
104
|
|
104
|
-
|
105
|
-
res = chrma.search_embedding(
|
106
|
-
query=q, k=100, filters={"id": 9999}
|
107
|
-
)
|
108
|
-
assert (
|
109
|
-
res[0] == 9999
|
110
|
-
)
|
111
|
-
|
112
|
-
#Test two filters, id and metadata
|
113
|
-
res = chrma.search_embedding(
|
114
|
-
query=q, k=100, filters={"metadata": filter_value, "id": 9999}
|
115
|
-
)
|
116
|
-
assert (
|
117
|
-
res[0] == 9999 and len(res) == 1, f"filters failed, got: ({res[0]}), expected ({9999})"
|
118
|
-
)
|
105
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import logging
|
2
|
+
import pytest
|
3
|
+
from vectordb_bench.backend.data_source import DatasetSource
|
4
|
+
from vectordb_bench.backend.cases import type2case
|
5
|
+
|
6
|
+
log = logging.getLogger("vectordb_bench")
|
7
|
+
|
8
|
+
class TestReader:
|
9
|
+
@pytest.mark.parametrize("type_case", [
|
10
|
+
(k, v) for k, v in type2case.items()
|
11
|
+
])
|
12
|
+
def test_type_cases(self, type_case):
|
13
|
+
self.per_case_test(type_case)
|
14
|
+
|
15
|
+
|
16
|
+
def per_case_test(self, type_case):
|
17
|
+
t, ca_cls = type_case
|
18
|
+
ca = ca_cls()
|
19
|
+
log.info(f"test case: {t.name}, {ca.name}")
|
20
|
+
|
21
|
+
filters = ca.filter_rate
|
22
|
+
ca.dataset.prepare(source=DatasetSource.AliyunOSS, filters=filters)
|
23
|
+
ali_trains = ca.dataset.train_files
|
24
|
+
|
25
|
+
ca.dataset.prepare(filters=filters)
|
26
|
+
s3_trains = ca.dataset.train_files
|
27
|
+
|
28
|
+
assert ali_trains == s3_trains
|
@@ -0,0 +1,77 @@
|
|
1
|
+
from vectordb_bench.backend.dataset import Dataset
|
2
|
+
import logging
|
3
|
+
import pytest
|
4
|
+
from pydantic import ValidationError
|
5
|
+
from vectordb_bench.backend.data_source import DatasetSource
|
6
|
+
|
7
|
+
|
8
|
+
log = logging.getLogger("vectordb_bench")
|
9
|
+
|
10
|
+
class TestDataSet:
|
11
|
+
def test_iter_dataset(self):
|
12
|
+
for ds in Dataset:
|
13
|
+
log.info(ds)
|
14
|
+
|
15
|
+
def test_cohere(self):
|
16
|
+
cohere = Dataset.COHERE.get(100_000)
|
17
|
+
log.info(cohere)
|
18
|
+
assert cohere.name == "Cohere"
|
19
|
+
assert cohere.size == 100_000
|
20
|
+
assert cohere.label == "SMALL"
|
21
|
+
assert cohere.dim == 768
|
22
|
+
|
23
|
+
def test_cohere_error(self):
|
24
|
+
with pytest.raises(ValidationError):
|
25
|
+
Dataset.COHERE.get(9999)
|
26
|
+
|
27
|
+
def test_iter_cohere(self):
|
28
|
+
cohere_10m = Dataset.COHERE.manager(10_000_000)
|
29
|
+
cohere_10m.prepare()
|
30
|
+
|
31
|
+
import time
|
32
|
+
before = time.time()
|
33
|
+
for i in cohere_10m:
|
34
|
+
log.debug(i.head(1))
|
35
|
+
|
36
|
+
dur_iter = time.time() - before
|
37
|
+
log.warning(f"iter through cohere_10m cost={dur_iter/60}min")
|
38
|
+
|
39
|
+
# pytest -sv tests/test_dataset.py::TestDataSet::test_iter_laion
|
40
|
+
def test_iter_laion(self):
|
41
|
+
laion_100m = Dataset.LAION.manager(100_000_000)
|
42
|
+
from vectordb_bench.backend.data_source import DatasetSource
|
43
|
+
laion_100m.prepare(source=DatasetSource.AliyunOSS)
|
44
|
+
|
45
|
+
import time
|
46
|
+
before = time.time()
|
47
|
+
for i in laion_100m:
|
48
|
+
log.debug(i.head(1))
|
49
|
+
|
50
|
+
dur_iter = time.time() - before
|
51
|
+
log.warning(f"iter through laion_100m cost={dur_iter/60}min")
|
52
|
+
|
53
|
+
def test_download_small(self):
|
54
|
+
openai_50k = Dataset.OPENAI.manager(50_000)
|
55
|
+
files = [
|
56
|
+
"test.parquet",
|
57
|
+
"neighbors.parquet",
|
58
|
+
"neighbors_head_1p.parquet",
|
59
|
+
"neighbors_tail_1p.parquet",
|
60
|
+
]
|
61
|
+
|
62
|
+
file_path = openai_50k.data_dir.joinpath("test.parquet")
|
63
|
+
import os
|
64
|
+
|
65
|
+
DatasetSource.S3.reader().read(
|
66
|
+
openai_50k.data.dir_name.lower(),
|
67
|
+
files=files,
|
68
|
+
local_ds_root=openai_50k.data_dir,
|
69
|
+
)
|
70
|
+
|
71
|
+
os.remove(file_path)
|
72
|
+
DatasetSource.AliyunOSS.reader().read(
|
73
|
+
openai_50k.data.dir_name.lower(),
|
74
|
+
files=files,
|
75
|
+
local_ds_root=openai_50k.data_dir,
|
76
|
+
)
|
77
|
+
|
@@ -37,3 +37,30 @@ class TestUtils:
|
|
37
37
|
log.info(f"recall: {res}, expected: {expected}")
|
38
38
|
assert res == expected
|
39
39
|
|
40
|
+
|
41
|
+
class TestGetFiles:
|
42
|
+
@pytest.mark.parametrize("train_count", [
|
43
|
+
1,
|
44
|
+
10,
|
45
|
+
50,
|
46
|
+
100,
|
47
|
+
])
|
48
|
+
def test_train_count(self, train_count):
|
49
|
+
files = utils.compose_train_files(train_count, True)
|
50
|
+
log.info(files)
|
51
|
+
|
52
|
+
assert len(files) == train_count
|
53
|
+
|
54
|
+
@pytest.mark.parametrize("use_shuffled", [True, False])
|
55
|
+
def test_use_shuffled(self, use_shuffled):
|
56
|
+
files = utils.compose_train_files(1, use_shuffled)
|
57
|
+
log.info(files)
|
58
|
+
|
59
|
+
trains = [f for f in files if "train" in f]
|
60
|
+
if use_shuffled:
|
61
|
+
for t in trains:
|
62
|
+
assert "shuffle_train" in t
|
63
|
+
else:
|
64
|
+
for t in trains:
|
65
|
+
assert "shuffle" not in t
|
66
|
+
assert "train" in t
|
@@ -54,8 +54,8 @@ class DB(Enum):
|
|
54
54
|
return ElasticCloud
|
55
55
|
|
56
56
|
if self == DB.QdrantCloud:
|
57
|
-
from .qdrant_cloud.qdrant_cloud import
|
58
|
-
return
|
57
|
+
from .qdrant_cloud.qdrant_cloud import QdrantCloud
|
58
|
+
return QdrantCloud
|
59
59
|
|
60
60
|
if self == DB.WeaviateCloud:
|
61
61
|
from .weaviate_cloud.weaviate_cloud import WeaviateCloud
|
@@ -142,8 +142,8 @@ class DB(Enum):
|
|
142
142
|
return WeaviateIndexConfig
|
143
143
|
|
144
144
|
if self == DB.PgVector:
|
145
|
-
from .pgvector.config import
|
146
|
-
return
|
145
|
+
from .pgvector.config import _pgvector_case_config
|
146
|
+
return _pgvector_case_config.get(index_type)
|
147
147
|
|
148
148
|
if self == DB.PgVectoRS:
|
149
149
|
from .pgvecto_rs.config import _pgvecto_rs_case_config
|
{vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/chroma/chroma.py
RENAMED
@@ -106,21 +106,9 @@ class ChromaClient(VectorDB):
|
|
106
106
|
"""
|
107
107
|
if filters:
|
108
108
|
# assumes benchmark test filters of format: {'metadata': '>=10000', 'id': 10000}
|
109
|
-
metadata_value = filters.get("metadata")
|
110
109
|
id_value = filters.get("id")
|
111
|
-
|
112
|
-
|
113
|
-
query_embeddings=query, n_results=k,
|
114
|
-
where={"$and": [{"id": {"$eq": id_value}},
|
115
|
-
{"id": {"$gt": metadata_value}}
|
116
|
-
]}
|
117
|
-
)
|
118
|
-
elif metadata_value:
|
119
|
-
results = self.collection.query(query_embeddings=query, n_results=k,
|
120
|
-
where={"id": {"$gt": metadata_value}})
|
121
|
-
else:
|
122
|
-
results = self.collection.query(query_embeddings=query, n_results=k,
|
123
|
-
where={"id": {"$eq": id_value}})
|
110
|
+
results = self.collection.query(query_embeddings=query, n_results=k,
|
111
|
+
where={"id": {"$gt": id_value}})
|
124
112
|
#return list of id's in results
|
125
113
|
return [int(i) for i in results.get('ids')[0]]
|
126
114
|
results = self.collection.query(query_embeddings=query, n_results=k)
|
{vectordb-bench-0.0.6 → vectordb_bench-0.0.8}/vectordb_bench/backend/clients/milvus/config.py
RENAMED
@@ -95,6 +95,24 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
|
|
95
95
|
"metric_type": self.parse_metric(),
|
96
96
|
"params": {"nprobe": self.nprobe},
|
97
97
|
}
|
98
|
+
|
99
|
+
class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
|
100
|
+
nlist: int
|
101
|
+
nprobe: int | None = None
|
102
|
+
index: IndexType = IndexType.IVFSQ8
|
103
|
+
|
104
|
+
def index_param(self) -> dict:
|
105
|
+
return {
|
106
|
+
"metric_type": self.parse_metric(),
|
107
|
+
"index_type": self.index.value,
|
108
|
+
"params": {"nlist": self.nlist},
|
109
|
+
}
|
110
|
+
|
111
|
+
def search_param(self) -> dict:
|
112
|
+
return {
|
113
|
+
"metric_type": self.parse_metric(),
|
114
|
+
"params": {"nprobe": self.nprobe},
|
115
|
+
}
|
98
116
|
|
99
117
|
|
100
118
|
class FLATConfig(MilvusIndexConfig, DBCaseConfig):
|
@@ -210,6 +228,7 @@ _milvus_case_config = {
|
|
210
228
|
IndexType.HNSW: HNSWConfig,
|
211
229
|
IndexType.DISKANN: DISKANNConfig,
|
212
230
|
IndexType.IVFFlat: IVFFlatConfig,
|
231
|
+
IndexType.IVFSQ8: IVFSQ8Config,
|
213
232
|
IndexType.Flat: FLATConfig,
|
214
233
|
IndexType.GPU_IVF_FLAT: GPUIVFFlatConfig,
|
215
234
|
IndexType.GPU_IVF_PQ: GPUIVFPQConfig,
|