vectordb-bench 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.env.example +1 -1
  2. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/PKG-INFO +228 -14
  3. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/README.md +218 -12
  4. vectordb_bench-0.0.12/fig/custom_case_run_test.png +0 -0
  5. vectordb_bench-0.0.12/fig/custom_dataset.png +0 -0
  6. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/pyproject.toml +9 -2
  7. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/__init__.py +19 -5
  8. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/assembler.py +1 -1
  9. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/cases.py +93 -27
  10. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/__init__.py +14 -0
  11. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/api.py +1 -1
  12. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
  13. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
  14. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
  15. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
  16. vectordb_bench-0.0.12/vectordb_bench/backend/clients/milvus/cli.py +291 -0
  17. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/milvus.py +13 -6
  18. vectordb_bench-0.0.12/vectordb_bench/backend/clients/pgvector/cli.py +116 -0
  19. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/config.py +1 -1
  20. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
  21. vectordb_bench-0.0.12/vectordb_bench/backend/clients/redis/cli.py +74 -0
  22. vectordb_bench-0.0.12/vectordb_bench/backend/clients/test/cli.py +25 -0
  23. vectordb_bench-0.0.12/vectordb_bench/backend/clients/test/config.py +18 -0
  24. vectordb_bench-0.0.12/vectordb_bench/backend/clients/test/test.py +62 -0
  25. vectordb_bench-0.0.12/vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
  26. vectordb_bench-0.0.12/vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
  27. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/dataset.py +27 -5
  28. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/mp_runner.py +14 -3
  29. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/serial_runner.py +7 -3
  30. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/task_runner.py +76 -26
  31. vectordb_bench-0.0.12/vectordb_bench/cli/__init__.py +0 -0
  32. vectordb_bench-0.0.12/vectordb_bench/cli/cli.py +362 -0
  33. vectordb_bench-0.0.12/vectordb_bench/cli/vectordbbench.py +22 -0
  34. vectordb_bench-0.0.12/vectordb_bench/config-files/sample_config.yml +17 -0
  35. vectordb_bench-0.0.12/vectordb_bench/custom/custom_case.json +18 -0
  36. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/charts.py +6 -6
  37. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/data.py +23 -20
  38. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
  39. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/filters.py +20 -13
  40. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
  41. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
  42. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
  43. vectordb_bench-0.0.12/vectordb_bench/frontend/components/concurrent/charts.py +79 -0
  44. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
  45. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
  46. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
  47. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/initStyle.py +15 -0
  48. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
  49. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
  50. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
  51. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/dbSelector.py +8 -14
  52. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
  53. vectordb_bench-0.0.12/vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
  54. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/submitTask.py +13 -5
  55. vectordb_bench-0.0.12/vectordb_bench/frontend/components/tables/data.py +44 -0
  56. {vectordb_bench-0.0.10/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/dbCaseConfigs.py +140 -32
  57. {vectordb_bench-0.0.10/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/styles.py +2 -0
  58. vectordb_bench-0.0.12/vectordb_bench/frontend/pages/concurrent.py +65 -0
  59. vectordb_bench-0.0.12/vectordb_bench/frontend/pages/custom.py +64 -0
  60. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
  61. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/run_test.py +4 -0
  62. vectordb_bench-0.0.12/vectordb_bench/frontend/pages/tables.py +24 -0
  63. vectordb_bench-0.0.12/vectordb_bench/frontend/utils.py +22 -0
  64. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/vdb_benchmark.py +3 -3
  65. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/interface.py +21 -25
  66. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/metric.py +23 -1
  67. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/models.py +45 -1
  68. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/getLeaderboardData.py +1 -1
  69. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/PKG-INFO +228 -14
  70. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/SOURCES.txt +32 -3
  71. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/entry_points.txt +1 -0
  72. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/requires.txt +11 -1
  73. vectordb_bench-0.0.10/vectordb_bench/frontend/utils.py +0 -6
  74. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.devcontainer/Dockerfile +0 -0
  75. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.devcontainer/devcontainer.json +0 -0
  76. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.github/workflows/publish_package_on_release.yml +0 -0
  77. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.github/workflows/pull_request.yml +0 -0
  78. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.gitignore +0 -0
  79. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.ruff.toml +0 -0
  80. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/Dockerfile +0 -0
  81. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/LICENSE +0 -0
  82. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/Makefile +0 -0
  83. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/OWNERS +0 -0
  84. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/install/requirements_py3.11.txt +0 -0
  85. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/install.py +0 -0
  86. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/setup.cfg +0 -0
  87. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/conftest.py +0 -0
  88. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/pytest.ini +0 -0
  89. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_bench_runner.py +0 -0
  90. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_chroma.py +0 -0
  91. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_data_source.py +0 -0
  92. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_dataset.py +0 -0
  93. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_elasticsearch_cloud.py +0 -0
  94. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_models.py +0 -0
  95. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_redis.py +0 -0
  96. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_utils.py +0 -0
  97. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/ut_cases.py +0 -0
  98. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/__main__.py +0 -0
  99. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/__init__.py +0 -0
  100. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  101. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  102. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  103. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  104. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/config.py +0 -0
  105. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  106. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  107. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  108. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  109. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  110. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  111. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/config.py +0 -0
  112. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  113. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  114. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  115. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  116. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  117. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/data_source.py +0 -0
  118. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/result_collector.py +0 -0
  119. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/__init__.py +0 -0
  120. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/utils.py +0 -0
  121. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/base.py +0 -0
  122. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  123. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  124. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  125. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  126. {vectordb_bench-0.0.10/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/dbPrices.py +0 -0
  127. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/log_util.py +0 -0
  128. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  129. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  130. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  131. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  132. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  133. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  134. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  135. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  136. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  137. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  138. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  139. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  140. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  141. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  142. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
  143. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/dbPrices.json +0 -0
  144. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/leaderboard.json +0 -0
  145. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  146. {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -6,6 +6,6 @@
6
6
  # NUM_PER_BATCH=
7
7
  # DEFAULT_DATASET_URL=
8
8
 
9
- DATASET_LOCAL_DIR="/tmp/vector_db_bench/dataset"
9
+ DATASET_LOCAL_DIR="/tmp/vectordb_bench/dataset"
10
10
 
11
11
  # DROP_OLD = True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.11
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
+ Requires-Dist: click
13
14
  Requires-Dist: pytz
14
15
  Requires-Dist: streamlit-autorefresh
15
16
  Requires-Dist: streamlit!=1.34.0
@@ -40,6 +41,9 @@ Requires-Dist: redis; extra == "all"
40
41
  Requires-Dist: chromadb; extra == "all"
41
42
  Requires-Dist: psycopg2; extra == "all"
42
43
  Requires-Dist: psycopg; extra == "all"
44
+ Requires-Dist: psycopg-binary; extra == "all"
45
+ Requires-Dist: opensearch-dsl==2.1.0; extra == "all"
46
+ Requires-Dist: opensearch-py==2.6.0; extra == "all"
43
47
  Provides-Extra: qdrant
44
48
  Requires-Dist: qdrant-client; extra == "qdrant"
45
49
  Provides-Extra: pinecone
@@ -49,14 +53,18 @@ Requires-Dist: weaviate-client; extra == "weaviate"
49
53
  Provides-Extra: elastic
50
54
  Requires-Dist: elasticsearch; extra == "elastic"
51
55
  Provides-Extra: pgvector
52
- Requires-Dist: pgvector; extra == "pgvector"
53
56
  Requires-Dist: psycopg; extra == "pgvector"
57
+ Requires-Dist: psycopg-binary; extra == "pgvector"
58
+ Requires-Dist: pgvector; extra == "pgvector"
54
59
  Provides-Extra: pgvecto-rs
55
60
  Requires-Dist: psycopg2; extra == "pgvecto-rs"
56
61
  Provides-Extra: redis
57
62
  Requires-Dist: redis; extra == "redis"
58
63
  Provides-Extra: chromadb
59
64
  Requires-Dist: chromadb; extra == "chromadb"
65
+ Provides-Extra: awsopensearch
66
+ Requires-Dist: awsopensearch; extra == "awsopensearch"
67
+ Provides-Extra: zilliz-cloud
60
68
 
61
69
  # VectorDBBench: A Benchmark Tool for VectorDB
62
70
 
@@ -87,24 +95,134 @@ pip install vectordb-bench[pinecone]
87
95
  ```
88
96
  All the database client supported
89
97
 
90
- |Optional database client|install command|
91
- |---------------|---------------|
92
- |pymilvus(*default*)|`pip install vectordb-bench`|
93
- |all|`pip install vectordb-bench[all]`|
94
- |qdrant|`pip install vectordb-bench[qdrant]`|
95
- |pinecone|`pip install vectordb-bench[pinecone]`|
96
- |weaviate|`pip install vectordb-bench[weaviate]`|
97
- |elastic|`pip install vectordb-bench[elastic]`|
98
- |pgvector|`pip install vectordb-bench[pgvector]`|
99
- |pgvecto.rs|`pip install vectordb-bench[pgvecto_rs]`|
100
- |redis|`pip install vectordb-bench[redis]`|
101
- |chromadb|`pip install vectordb-bench[chromadb]`|
98
+ | Optional database client | install command |
99
+ |--------------------------|---------------------------------------------|
100
+ | pymilvus(*default*) | `pip install vectordb-bench` |
101
+ | all | `pip install vectordb-bench[all]` |
102
+ | qdrant | `pip install vectordb-bench[qdrant]` |
103
+ | pinecone | `pip install vectordb-bench[pinecone]` |
104
+ | weaviate | `pip install vectordb-bench[weaviate]` |
105
+ | elastic | `pip install vectordb-bench[elastic]` |
106
+ | pgvector | `pip install vectordb-bench[pgvector]` |
107
+ | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
108
+ | redis | `pip install vectordb-bench[redis]` |
109
+ | chromadb | `pip install vectordb-bench[chromadb]` |
110
+ | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
102
111
 
103
112
  ### Run
104
113
 
105
114
  ``` shell
106
115
  init_bench
107
116
  ```
117
+
118
+ OR:
119
+
120
+ ### Run from the command line.
121
+
122
+ ``` shell
123
+ vectordbbench [OPTIONS] COMMAND [ARGS]...
124
+ ```
125
+ To list the clients that are runnable via the commandline option, execute: `vectordbbench --help`
126
+ ``` text
127
+ $ vectordbbench --help
128
+ Usage: vectordbbench [OPTIONS] COMMAND [ARGS]...
129
+
130
+ Options:
131
+ --help Show this message and exit.
132
+
133
+ Commands:
134
+ pgvectorhnsw
135
+ pgvectorivfflat
136
+ test
137
+ weaviate
138
+ ```
139
+ To list the options for each command, execute `vectordbbench [command] --help`
140
+
141
+ ```text
142
+ $ vectordbbench pgvectorhnsw --help
143
+ Usage: vectordbbench pgvectorhnsw [OPTIONS]
144
+
145
+ Options:
146
+ --config-file PATH Read configuration from yaml file
147
+ --drop-old / --skip-drop-old Drop old or skip [default: drop-old]
148
+ --load / --skip-load Load or skip [default: load]
149
+ --search-serial / --skip-search-serial
150
+ Search serial or skip [default: search-
151
+ serial]
152
+ --search-concurrent / --skip-search-concurrent
153
+ Search concurrent or skip [default: search-
154
+ concurrent]
155
+ --case-type [CapacityDim128|CapacityDim960|Performance768D100M|Performance768D10M|Performance768D1M|Performance768D10M1P|Performance768D1M1P|Performance768D10M99P|Performance768D1M99P|Performance1536D500K|Performance1536D5M|Performance1536D500K1P|Performance1536D5M1P|Performance1536D500K99P|Performance1536D5M99P|Performance1536D50K]
156
+ Case type
157
+ --db-label TEXT Db label, default: date in ISO format
158
+ [default: 2024-05-20T20:26:31.113290]
159
+ --dry-run Print just the configuration and exit
160
+ without running the tasks
161
+ --k INTEGER K value for number of nearest neighbors to
162
+ search [default: 100]
163
+ --concurrency-duration INTEGER Adjusts the duration in seconds of each
164
+ concurrency search [default: 30]
165
+ --num-concurrency TEXT Comma-separated list of concurrency values
166
+ to test during concurrent search [default:
167
+ 1,10,20]
168
+ --user-name TEXT Db username [required]
169
+ --password TEXT Db password [required]
170
+ --host TEXT Db host [required]
171
+ --db-name TEXT Db name [required]
172
+ --maintenance-work-mem TEXT Sets the maximum memory to be used for
173
+ maintenance operations (index creation). Can
174
+ be entered as string with unit like '64GB'
175
+ or as an integer number of KB.This will set
176
+ the parameters:
177
+ max_parallel_maintenance_workers,
178
+ max_parallel_workers &
179
+ table(parallel_workers)
180
+ --max-parallel-workers INTEGER Sets the maximum number of parallel
181
+ processes per maintenance operation (index
182
+ creation)
183
+ --m INTEGER hnsw m
184
+ --ef-construction INTEGER hnsw ef-construction
185
+ --ef-search INTEGER hnsw ef-search
186
+ --help Show this message and exit.
187
+ ```
188
+ #### Using a configuration file.
189
+
190
+ The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
191
+
192
+ By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
193
+ the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
194
+
195
+ The required format is:
196
+ ```yaml
197
+ commandname:
198
+ parameter_name: parameter_value
199
+ parameter_name: parameter_value
200
+ ```
201
+ Example:
202
+ ```yaml
203
+ pgvectorhnsw:
204
+ db_label: pgConfigTest
205
+ user_name: vectordbbench
206
+ password: vectordbbench
207
+ db_name: vectordbbench
208
+ host: localhost
209
+ m: 16
210
+ ef_construction: 128
211
+ ef_search: 128
212
+ milvushnsw:
213
+ skip_search_serial: True
214
+ case_type: Performance1536D50K
215
+ uri: http://localhost:19530
216
+ m: 16
217
+ ef_construction: 128
218
+ ef_search: 128
219
+ drop_old: False
220
+ load: False
221
+ ```
222
+ > Notes:
223
+ > - Options passed on the command line will override the configuration file*
224
+ > - Parameter names use an _ not -
225
+
108
226
  ## What is VectorDBBench
109
227
  VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
110
228
 
@@ -232,6 +350,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
232
350
 
233
351
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
234
352
 
353
+ #### Custom Dataset for Performance case
354
+
355
+ Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
356
+
357
+ ![image](fig/custom_dataset.png)
358
+ ![image](fig/custom_case_run_test.png)
359
+
360
+ We have strict requirements for the data set format, please follow them.
361
+ - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
362
+ - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
363
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
364
+ - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
365
+
366
+ - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
367
+
368
+ - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
369
+
370
+
235
371
  ## Goals
236
372
  Our goals of this benchmark are:
237
373
  ### Reproducibility & Usability
@@ -280,6 +416,7 @@ class NewDBCaseConfig(DBCaseConfig):
280
416
  # Implement optional case-specific configuration fields
281
417
  # ...
282
418
  ```
419
+
283
420
  **Step 3: Importing the DB Client and Updating Initialization**
284
421
 
285
422
  In this final step, you will import your DB client into clients/__init__.py and update the initialization process.
@@ -318,6 +455,83 @@ class DB(Enum):
318
455
  return NewClientCaseConfig
319
456
 
320
457
  ```
458
+ **Step 4: Implement new_client/cli.py and vectordb_bench/cli/vectordbbench.py**
459
+
460
+ In this (optional, but encouraged) step you will enable the test to be run from the command line.
461
+ 1. Navigate to the vectordb_bench/backend/clients/"client" directory.
462
+ 2. Inside the "client" folder, create a cli.py file.
463
+ Using zilliz as an example cli.py:
464
+ ```python
465
+ from typing import Annotated, Unpack
466
+
467
+ import click
468
+ import os
469
+ from pydantic import SecretStr
470
+
471
+ from vectordb_bench.cli.cli import (
472
+ CommonTypedDict,
473
+ cli,
474
+ click_parameter_decorators_from_typed_dict,
475
+ run,
476
+ )
477
+ from vectordb_bench.backend.clients import DB
478
+
479
+
480
+ class ZillizTypedDict(CommonTypedDict):
481
+ uri: Annotated[
482
+ str, click.option("--uri", type=str, help="uri connection string", required=True)
483
+ ]
484
+ user_name: Annotated[
485
+ str, click.option("--user-name", type=str, help="Db username", required=True)
486
+ ]
487
+ password: Annotated[
488
+ str,
489
+ click.option("--password",
490
+ type=str,
491
+ help="Zilliz password",
492
+ default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
493
+ show_default="$ZILLIZ_PASSWORD",
494
+ ),
495
+ ]
496
+ level: Annotated[
497
+ str,
498
+ click.option("--level", type=str, help="Zilliz index level", required=False),
499
+ ]
500
+
501
+
502
+ @cli.command()
503
+ @click_parameter_decorators_from_typed_dict(ZillizTypedDict)
504
+ def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
505
+ from .config import ZillizCloudConfig, AutoIndexConfig
506
+
507
+ run(
508
+ db=DB.ZillizCloud,
509
+ db_config=ZillizCloudConfig(
510
+ db_label=parameters["db_label"],
511
+ uri=SecretStr(parameters["uri"]),
512
+ user=parameters["user_name"],
513
+ password=SecretStr(parameters["password"]),
514
+ ),
515
+ db_case_config=AutoIndexConfig(
516
+ params={parameters["level"]},
517
+ ),
518
+ **parameters,
519
+ )
520
+ ```
521
+ 3. Update cli by adding:
522
+ 1. Add database specific options as an Annotated TypedDict, see ZillizTypedDict above.
523
+ 2. Add index configuration specific options as an Annotated TypedDict. (example: vectordb_bench/backend/clients/pgvector/cli.py)
524
+ 1. May not be needed if there is only one index config.
525
+ 2. Repeat for each index configuration, nesting them if possible.
526
+ 2. Add a index config specific function for each index type, see Zilliz above. The function name, in lowercase, will be the command name passed to the vectordbbench command.
527
+ 3. Update db_config and db_case_config to match client requirements
528
+ 4. Continue to add new functions for each index config.
529
+ 5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
530
+
531
+ > cli modules with multiple index configs:
532
+ > - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
533
+ > - milvus: vectordb_bench/backend/clients/milvus/cli.py
534
+
321
535
  That's it! You have successfully added a new DB client to the vectordb_bench project.
322
536
 
323
537
  ## Rules
@@ -27,24 +27,134 @@ pip install vectordb-bench[pinecone]
27
27
  ```
28
28
  All the database client supported
29
29
 
30
- |Optional database client|install command|
31
- |---------------|---------------|
32
- |pymilvus(*default*)|`pip install vectordb-bench`|
33
- |all|`pip install vectordb-bench[all]`|
34
- |qdrant|`pip install vectordb-bench[qdrant]`|
35
- |pinecone|`pip install vectordb-bench[pinecone]`|
36
- |weaviate|`pip install vectordb-bench[weaviate]`|
37
- |elastic|`pip install vectordb-bench[elastic]`|
38
- |pgvector|`pip install vectordb-bench[pgvector]`|
39
- |pgvecto.rs|`pip install vectordb-bench[pgvecto_rs]`|
40
- |redis|`pip install vectordb-bench[redis]`|
41
- |chromadb|`pip install vectordb-bench[chromadb]`|
30
+ | Optional database client | install command |
31
+ |--------------------------|---------------------------------------------|
32
+ | pymilvus(*default*) | `pip install vectordb-bench` |
33
+ | all | `pip install vectordb-bench[all]` |
34
+ | qdrant | `pip install vectordb-bench[qdrant]` |
35
+ | pinecone | `pip install vectordb-bench[pinecone]` |
36
+ | weaviate | `pip install vectordb-bench[weaviate]` |
37
+ | elastic | `pip install vectordb-bench[elastic]` |
38
+ | pgvector | `pip install vectordb-bench[pgvector]` |
39
+ | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
40
+ | redis | `pip install vectordb-bench[redis]` |
41
+ | chromadb | `pip install vectordb-bench[chromadb]` |
42
+ | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
42
43
 
43
44
  ### Run
44
45
 
45
46
  ``` shell
46
47
  init_bench
47
48
  ```
49
+
50
+ OR:
51
+
52
+ ### Run from the command line.
53
+
54
+ ``` shell
55
+ vectordbbench [OPTIONS] COMMAND [ARGS]...
56
+ ```
57
+ To list the clients that are runnable via the commandline option, execute: `vectordbbench --help`
58
+ ``` text
59
+ $ vectordbbench --help
60
+ Usage: vectordbbench [OPTIONS] COMMAND [ARGS]...
61
+
62
+ Options:
63
+ --help Show this message and exit.
64
+
65
+ Commands:
66
+ pgvectorhnsw
67
+ pgvectorivfflat
68
+ test
69
+ weaviate
70
+ ```
71
+ To list the options for each command, execute `vectordbbench [command] --help`
72
+
73
+ ```text
74
+ $ vectordbbench pgvectorhnsw --help
75
+ Usage: vectordbbench pgvectorhnsw [OPTIONS]
76
+
77
+ Options:
78
+ --config-file PATH Read configuration from yaml file
79
+ --drop-old / --skip-drop-old Drop old or skip [default: drop-old]
80
+ --load / --skip-load Load or skip [default: load]
81
+ --search-serial / --skip-search-serial
82
+ Search serial or skip [default: search-
83
+ serial]
84
+ --search-concurrent / --skip-search-concurrent
85
+ Search concurrent or skip [default: search-
86
+ concurrent]
87
+ --case-type [CapacityDim128|CapacityDim960|Performance768D100M|Performance768D10M|Performance768D1M|Performance768D10M1P|Performance768D1M1P|Performance768D10M99P|Performance768D1M99P|Performance1536D500K|Performance1536D5M|Performance1536D500K1P|Performance1536D5M1P|Performance1536D500K99P|Performance1536D5M99P|Performance1536D50K]
88
+ Case type
89
+ --db-label TEXT Db label, default: date in ISO format
90
+ [default: 2024-05-20T20:26:31.113290]
91
+ --dry-run Print just the configuration and exit
92
+ without running the tasks
93
+ --k INTEGER K value for number of nearest neighbors to
94
+ search [default: 100]
95
+ --concurrency-duration INTEGER Adjusts the duration in seconds of each
96
+ concurrency search [default: 30]
97
+ --num-concurrency TEXT Comma-separated list of concurrency values
98
+ to test during concurrent search [default:
99
+ 1,10,20]
100
+ --user-name TEXT Db username [required]
101
+ --password TEXT Db password [required]
102
+ --host TEXT Db host [required]
103
+ --db-name TEXT Db name [required]
104
+ --maintenance-work-mem TEXT Sets the maximum memory to be used for
105
+ maintenance operations (index creation). Can
106
+ be entered as string with unit like '64GB'
107
+ or as an integer number of KB.This will set
108
+ the parameters:
109
+ max_parallel_maintenance_workers,
110
+ max_parallel_workers &
111
+ table(parallel_workers)
112
+ --max-parallel-workers INTEGER Sets the maximum number of parallel
113
+ processes per maintenance operation (index
114
+ creation)
115
+ --m INTEGER hnsw m
116
+ --ef-construction INTEGER hnsw ef-construction
117
+ --ef-search INTEGER hnsw ef-search
118
+ --help Show this message and exit.
119
+ ```
120
+ #### Using a configuration file.
121
+
122
+ The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
123
+
124
+ By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
125
+ the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
126
+
127
+ The required format is:
128
+ ```yaml
129
+ commandname:
130
+ parameter_name: parameter_value
131
+ parameter_name: parameter_value
132
+ ```
133
+ Example:
134
+ ```yaml
135
+ pgvectorhnsw:
136
+ db_label: pgConfigTest
137
+ user_name: vectordbbench
138
+ password: vectordbbench
139
+ db_name: vectordbbench
140
+ host: localhost
141
+ m: 16
142
+ ef_construction: 128
143
+ ef_search: 128
144
+ milvushnsw:
145
+ skip_search_serial: True
146
+ case_type: Performance1536D50K
147
+ uri: http://localhost:19530
148
+ m: 16
149
+ ef_construction: 128
150
+ ef_search: 128
151
+ drop_old: False
152
+ load: False
153
+ ```
154
+ > Notes:
155
+ > - Options passed on the command line will override the configuration file*
156
+ > - Parameter names use an _ not -
157
+
48
158
  ## What is VectorDBBench
49
159
  VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
50
160
 
@@ -172,6 +282,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
172
282
 
173
283
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
174
284
 
285
+ #### Custom Dataset for Performance case
286
+
287
+ Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
288
+
289
+ ![image](fig/custom_dataset.png)
290
+ ![image](fig/custom_case_run_test.png)
291
+
292
+ We have strict requirements for the data set format, please follow them.
293
+ - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
294
+ - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
295
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
296
+ - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
297
+
298
+ - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
299
+
300
+ - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
301
+
302
+
175
303
  ## Goals
176
304
  Our goals of this benchmark are:
177
305
  ### Reproducibility & Usability
@@ -220,6 +348,7 @@ class NewDBCaseConfig(DBCaseConfig):
220
348
  # Implement optional case-specific configuration fields
221
349
  # ...
222
350
  ```
351
+
223
352
  **Step 3: Importing the DB Client and Updating Initialization**
224
353
 
225
354
  In this final step, you will import your DB client into clients/__init__.py and update the initialization process.
@@ -258,6 +387,83 @@ class DB(Enum):
258
387
  return NewClientCaseConfig
259
388
 
260
389
  ```
390
+ **Step 4: Implement new_client/cli.py and vectordb_bench/cli/vectordbbench.py**
391
+
392
+ In this (optional, but encouraged) step you will enable the test to be run from the command line.
393
+ 1. Navigate to the vectordb_bench/backend/clients/"client" directory.
394
+ 2. Inside the "client" folder, create a cli.py file.
395
+ Using zilliz as an example cli.py:
396
+ ```python
397
+ from typing import Annotated, Unpack
398
+
399
+ import click
400
+ import os
401
+ from pydantic import SecretStr
402
+
403
+ from vectordb_bench.cli.cli import (
404
+ CommonTypedDict,
405
+ cli,
406
+ click_parameter_decorators_from_typed_dict,
407
+ run,
408
+ )
409
+ from vectordb_bench.backend.clients import DB
410
+
411
+
412
+ class ZillizTypedDict(CommonTypedDict):
413
+ uri: Annotated[
414
+ str, click.option("--uri", type=str, help="uri connection string", required=True)
415
+ ]
416
+ user_name: Annotated[
417
+ str, click.option("--user-name", type=str, help="Db username", required=True)
418
+ ]
419
+ password: Annotated[
420
+ str,
421
+ click.option("--password",
422
+ type=str,
423
+ help="Zilliz password",
424
+ default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
425
+ show_default="$ZILLIZ_PASSWORD",
426
+ ),
427
+ ]
428
+ level: Annotated[
429
+ str,
430
+ click.option("--level", type=str, help="Zilliz index level", required=False),
431
+ ]
432
+
433
+
434
+ @cli.command()
435
+ @click_parameter_decorators_from_typed_dict(ZillizTypedDict)
436
+ def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
437
+ from .config import ZillizCloudConfig, AutoIndexConfig
438
+
439
+ run(
440
+ db=DB.ZillizCloud,
441
+ db_config=ZillizCloudConfig(
442
+ db_label=parameters["db_label"],
443
+ uri=SecretStr(parameters["uri"]),
444
+ user=parameters["user_name"],
445
+ password=SecretStr(parameters["password"]),
446
+ ),
447
+ db_case_config=AutoIndexConfig(
448
+ params={parameters["level"]},
449
+ ),
450
+ **parameters,
451
+ )
452
+ ```
453
+ 3. Update cli by adding:
454
+ 1. Add database specific options as an Annotated TypedDict, see ZillizTypedDict above.
455
+ 2. Add index configuration specific options as an Annotated TypedDict. (example: vectordb_bench/backend/clients/pgvector/cli.py)
456
+ 1. May not be needed if there is only one index config.
457
+ 2. Repeat for each index configuration, nesting them if possible.
458
+ 2. Add a index config specific function for each index type, see Zilliz above. The function name, in lowercase, will be the command name passed to the vectordbbench command.
459
+ 3. Update db_config and db_case_config to match client requirements
460
+ 4. Continue to add new functions for each index config.
461
+ 5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
462
+
463
+ > cli modules with multiple index configs:
464
+ > - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
465
+ > - milvus: vectordb_bench/backend/clients/milvus/cli.py
466
+
261
467
  That's it! You have successfully added a new DB client to the vectordb_bench project.
262
468
 
263
469
  ## Rules
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [tool.setuptools.packages.find]
9
9
  where = ["."]
10
- include = ["vectordb_bench"]
10
+ include = ["vectordb_bench", "vectordb_bench.cli"]
11
11
 
12
12
  [project]
13
13
  name = "vectordb-bench"
@@ -24,6 +24,7 @@ classifiers = [
24
24
  "Operating System :: OS Independent",
25
25
  ]
26
26
  dependencies = [
27
+ "click",
27
28
  "pytz",
28
29
  "streamlit-autorefresh",
29
30
  "streamlit!=1.34.0",
@@ -60,21 +61,27 @@ all = [
60
61
  "chromadb",
61
62
  "psycopg2",
62
63
  "psycopg",
64
+ "psycopg-binary",
65
+ "opensearch-dsl==2.1.0",
66
+ "opensearch-py==2.6.0",
63
67
  ]
64
68
 
65
69
  qdrant = [ "qdrant-client" ]
66
70
  pinecone = [ "pinecone-client" ]
67
71
  weaviate = [ "weaviate-client" ]
68
72
  elastic = [ "elasticsearch" ]
69
- pgvector = [ "pgvector", "psycopg" ]
73
+ pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
70
74
  pgvecto_rs = [ "psycopg2" ]
71
75
  redis = [ "redis" ]
72
76
  chromadb = [ "chromadb" ]
77
+ awsopensearch = [ "awsopensearch" ]
78
+ zilliz_cloud = []
73
79
 
74
80
  [project.urls]
75
81
  "repository" = "https://github.com/zilliztech/VectorDBBench"
76
82
 
77
83
  [project.scripts]
78
84
  init_bench = "vectordb_bench.__main__:main"
85
+ vectordbbench = "vectordb_bench.cli.vectordbbench:cli"
79
86
 
80
87
  [tool.setuptools_scm]
@@ -1,11 +1,13 @@
1
- import environs
2
1
  import inspect
3
2
  import pathlib
4
- from . import log_util
5
3
 
4
+ import environs
5
+
6
+ from . import log_util
6
7
 
7
8
  env = environs.Env()
8
- env.read_env(".env")
9
+ env.read_env(".env", False)
10
+
9
11
 
10
12
  class config:
11
13
  ALIYUN_OSS_URL = "assets.zilliz.com.cn/benchmark/"
@@ -19,9 +21,21 @@ class config:
19
21
 
20
22
  DROP_OLD = env.bool("DROP_OLD", True)
21
23
  USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
22
- NUM_CONCURRENCY = [1, 5, 10, 15, 20, 25, 30, 35]
23
24
 
24
- RESULTS_LOCAL_DIR = pathlib.Path(__file__).parent.joinpath("results")
25
+ NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )
26
+
27
+ CONCURRENCY_DURATION = 30
28
+
29
+ RESULTS_LOCAL_DIR = env.path(
30
+ "RESULTS_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("results")
31
+ )
32
+ CONFIG_LOCAL_DIR = env.path(
33
+ "CONFIG_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("config-files")
34
+ )
35
+
36
+
37
+ K_DEFAULT = 100 # default return top k nearest neighbors during search
38
+ CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
25
39
 
26
40
  CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
27
41
  LOAD_TIMEOUT_DEFAULT = 2.5 * 3600 # 2.5h
@@ -14,7 +14,7 @@ class Assembler:
14
14
  def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
15
15
  c_cls = task.case_config.case_id.case_cls
16
16
 
17
- c = c_cls()
17
+ c = c_cls(task.case_config.custom_case)
18
18
  if type(task.db_case_config) != EmptyDBCaseConfig:
19
19
  task.db_case_config.metric_type = c.dataset.data.metric_type
20
20