vectordb-bench 0.0.11__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/PKG-INFO +36 -13
  2. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/README.md +31 -12
  3. vectordb_bench-0.0.12/fig/custom_case_run_test.png +0 -0
  4. vectordb_bench-0.0.12/fig/custom_dataset.png +0 -0
  5. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/pyproject.toml +4 -0
  6. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/__init__.py +1 -0
  7. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/assembler.py +1 -1
  8. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/cases.py +64 -18
  9. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/__init__.py +13 -0
  10. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
  11. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
  12. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
  13. vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
  14. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/dataset.py +27 -5
  15. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/cli/vectordbbench.py +2 -0
  16. vectordb_bench-0.0.12/vectordb_bench/custom/custom_case.json +18 -0
  17. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/charts.py +6 -6
  18. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/data.py +12 -12
  19. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
  20. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/filters.py +20 -13
  21. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
  22. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
  23. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
  24. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/concurrent/charts.py +26 -29
  25. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
  26. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
  27. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
  28. vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/initStyle.py +15 -0
  29. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
  30. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
  31. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
  32. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/dbSelector.py +2 -14
  33. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
  34. vectordb_bench-0.0.12/vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
  35. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/submitTask.py +1 -1
  36. {vectordb_bench-0.0.11/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/dbCaseConfigs.py +138 -31
  37. {vectordb_bench-0.0.11/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/styles.py +2 -0
  38. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/concurrent.py +11 -18
  39. vectordb_bench-0.0.12/vectordb_bench/frontend/pages/custom.py +64 -0
  40. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
  41. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/run_test.py +4 -0
  42. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/tables.py +2 -2
  43. vectordb_bench-0.0.12/vectordb_bench/frontend/utils.py +22 -0
  44. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/vdb_benchmark.py +3 -3
  45. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/models.py +8 -4
  46. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/getLeaderboardData.py +1 -1
  47. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/PKG-INFO +36 -13
  48. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/SOURCES.txt +16 -3
  49. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/requires.txt +5 -0
  50. vectordb_bench-0.0.11/vectordb_bench/frontend/utils.py +0 -6
  51. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.devcontainer/Dockerfile +0 -0
  52. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.devcontainer/devcontainer.json +0 -0
  53. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.env.example +0 -0
  54. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.github/workflows/publish_package_on_release.yml +0 -0
  55. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.github/workflows/pull_request.yml +0 -0
  56. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.gitignore +0 -0
  57. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/.ruff.toml +0 -0
  58. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/Dockerfile +0 -0
  59. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/LICENSE +0 -0
  60. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/Makefile +0 -0
  61. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/OWNERS +0 -0
  62. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/install/requirements_py3.11.txt +0 -0
  63. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/install.py +0 -0
  64. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/setup.cfg +0 -0
  65. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/conftest.py +0 -0
  66. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/pytest.ini +0 -0
  67. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_bench_runner.py +0 -0
  68. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_chroma.py +0 -0
  69. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_data_source.py +0 -0
  70. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_dataset.py +0 -0
  71. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_elasticsearch_cloud.py +0 -0
  72. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_models.py +0 -0
  73. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_redis.py +0 -0
  74. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/test_utils.py +0 -0
  75. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/tests/ut_cases.py +0 -0
  76. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/__main__.py +0 -0
  77. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/__init__.py +0 -0
  78. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/api.py +0 -0
  79. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  80. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  81. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  82. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  83. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/cli.py +0 -0
  84. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/config.py +0 -0
  85. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/milvus.py +0 -0
  86. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  87. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  88. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  89. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  90. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  91. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  92. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  93. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  94. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  95. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  96. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/config.py +0 -0
  97. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  98. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/test/cli.py +0 -0
  99. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/test/config.py +0 -0
  100. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/test/test.py +0 -0
  101. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  102. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  103. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  104. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  105. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  106. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  107. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/data_source.py +0 -0
  108. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/result_collector.py +0 -0
  109. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/__init__.py +0 -0
  110. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/mp_runner.py +0 -0
  111. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/serial_runner.py +0 -0
  112. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/task_runner.py +0 -0
  113. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/backend/utils.py +0 -0
  114. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/base.py +0 -0
  115. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/cli/__init__.py +0 -0
  116. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/cli/cli.py +0 -0
  117. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/config-files/sample_config.yml +0 -0
  118. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  119. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  120. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  121. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  122. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/tables/data.py +0 -0
  123. {vectordb_bench-0.0.11/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/dbPrices.py +0 -0
  124. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/interface.py +0 -0
  125. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/log_util.py +0 -0
  126. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/metric.py +0 -0
  127. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  128. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  129. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  130. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  131. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  132. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  133. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  134. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  135. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  136. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  137. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  138. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  139. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  140. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  141. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
  142. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/dbPrices.json +0 -0
  143. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench/results/leaderboard.json +0 -0
  144. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  145. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/entry_points.txt +0 -0
  146. {vectordb_bench-0.0.11 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -42,6 +42,8 @@ Requires-Dist: chromadb; extra == "all"
42
42
  Requires-Dist: psycopg2; extra == "all"
43
43
  Requires-Dist: psycopg; extra == "all"
44
44
  Requires-Dist: psycopg-binary; extra == "all"
45
+ Requires-Dist: opensearch-dsl==2.1.0; extra == "all"
46
+ Requires-Dist: opensearch-py==2.6.0; extra == "all"
45
47
  Provides-Extra: qdrant
46
48
  Requires-Dist: qdrant-client; extra == "qdrant"
47
49
  Provides-Extra: pinecone
@@ -60,6 +62,8 @@ Provides-Extra: redis
60
62
  Requires-Dist: redis; extra == "redis"
61
63
  Provides-Extra: chromadb
62
64
  Requires-Dist: chromadb; extra == "chromadb"
65
+ Provides-Extra: awsopensearch
66
+ Requires-Dist: awsopensearch; extra == "awsopensearch"
63
67
  Provides-Extra: zilliz-cloud
64
68
 
65
69
  # VectorDBBench: A Benchmark Tool for VectorDB
@@ -91,18 +95,19 @@ pip install vectordb-bench[pinecone]
91
95
  ```
92
96
  All the database client supported
93
97
 
94
- |Optional database client|install command|
95
- |---------------|---------------|
96
- |pymilvus(*default*)|`pip install vectordb-bench`|
97
- |all|`pip install vectordb-bench[all]`|
98
- |qdrant|`pip install vectordb-bench[qdrant]`|
99
- |pinecone|`pip install vectordb-bench[pinecone]`|
100
- |weaviate|`pip install vectordb-bench[weaviate]`|
101
- |elastic|`pip install vectordb-bench[elastic]`|
102
- |pgvector|`pip install vectordb-bench[pgvector]`|
103
- |pgvecto.rs|`pip install vectordb-bench[pgvecto_rs]`|
104
- |redis|`pip install vectordb-bench[redis]`|
105
- |chromadb|`pip install vectordb-bench[chromadb]`|
98
+ | Optional database client | install command |
99
+ |--------------------------|---------------------------------------------|
100
+ | pymilvus(*default*) | `pip install vectordb-bench` |
101
+ | all | `pip install vectordb-bench[all]` |
102
+ | qdrant | `pip install vectordb-bench[qdrant]` |
103
+ | pinecone | `pip install vectordb-bench[pinecone]` |
104
+ | weaviate | `pip install vectordb-bench[weaviate]` |
105
+ | elastic | `pip install vectordb-bench[elastic]` |
106
+ | pgvector | `pip install vectordb-bench[pgvector]` |
107
+ | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
108
+ | redis | `pip install vectordb-bench[redis]` |
109
+ | chromadb | `pip install vectordb-bench[chromadb]` |
110
+ | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
106
111
 
107
112
  ### Run
108
113
 
@@ -345,6 +350,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
345
350
 
346
351
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
347
352
 
353
+ #### Custom Dataset for Performance case
354
+
355
+ Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
356
+
357
+ ![image](fig/custom_dataset.png)
358
+ ![image](fig/custom_case_run_test.png)
359
+
360
+ We have strict requirements for the data set format, please follow them.
361
+ - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
362
+ - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
363
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
364
+ - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
365
+
366
+ - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
367
+
368
+ - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
369
+
370
+
348
371
  ## Goals
349
372
  Our goals of this benchmark are:
350
373
  ### Reproducibility & Usability
@@ -27,18 +27,19 @@ pip install vectordb-bench[pinecone]
27
27
  ```
28
28
  All the database client supported
29
29
 
30
- |Optional database client|install command|
31
- |---------------|---------------|
32
- |pymilvus(*default*)|`pip install vectordb-bench`|
33
- |all|`pip install vectordb-bench[all]`|
34
- |qdrant|`pip install vectordb-bench[qdrant]`|
35
- |pinecone|`pip install vectordb-bench[pinecone]`|
36
- |weaviate|`pip install vectordb-bench[weaviate]`|
37
- |elastic|`pip install vectordb-bench[elastic]`|
38
- |pgvector|`pip install vectordb-bench[pgvector]`|
39
- |pgvecto.rs|`pip install vectordb-bench[pgvecto_rs]`|
40
- |redis|`pip install vectordb-bench[redis]`|
41
- |chromadb|`pip install vectordb-bench[chromadb]`|
30
+ | Optional database client | install command |
31
+ |--------------------------|---------------------------------------------|
32
+ | pymilvus(*default*) | `pip install vectordb-bench` |
33
+ | all | `pip install vectordb-bench[all]` |
34
+ | qdrant | `pip install vectordb-bench[qdrant]` |
35
+ | pinecone | `pip install vectordb-bench[pinecone]` |
36
+ | weaviate | `pip install vectordb-bench[weaviate]` |
37
+ | elastic | `pip install vectordb-bench[elastic]` |
38
+ | pgvector | `pip install vectordb-bench[pgvector]` |
39
+ | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
40
+ | redis | `pip install vectordb-bench[redis]` |
41
+ | chromadb | `pip install vectordb-bench[chromadb]` |
42
+ | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
42
43
 
43
44
  ### Run
44
45
 
@@ -281,6 +282,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
281
282
 
282
283
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
283
284
 
285
+ #### Custom Dataset for Performance case
286
+
287
+ Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
288
+
289
+ ![image](fig/custom_dataset.png)
290
+ ![image](fig/custom_case_run_test.png)
291
+
292
+ We have strict requirements for the data set format, please follow them.
293
+ - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
294
+ - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
295
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
296
+ - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
297
+
298
+ - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
299
+
300
+ - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
301
+
302
+
284
303
  ## Goals
285
304
  Our goals of this benchmark are:
286
305
  ### Reproducibility & Usability
@@ -62,6 +62,8 @@ all = [
62
62
  "psycopg2",
63
63
  "psycopg",
64
64
  "psycopg-binary",
65
+ "opensearch-dsl==2.1.0",
66
+ "opensearch-py==2.6.0",
65
67
  ]
66
68
 
67
69
  qdrant = [ "qdrant-client" ]
@@ -72,6 +74,7 @@ pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
72
74
  pgvecto_rs = [ "psycopg2" ]
73
75
  redis = [ "redis" ]
74
76
  chromadb = [ "chromadb" ]
77
+ awsopensearch = [ "awsopensearch" ]
75
78
  zilliz_cloud = []
76
79
 
77
80
  [project.urls]
@@ -80,4 +83,5 @@ zilliz_cloud = []
80
83
  [project.scripts]
81
84
  init_bench = "vectordb_bench.__main__:main"
82
85
  vectordbbench = "vectordb_bench.cli.vectordbbench:cli"
86
+
83
87
  [tool.setuptools_scm]
@@ -35,6 +35,7 @@ class config:
35
35
 
36
36
 
37
37
  K_DEFAULT = 100 # default return top k nearest neighbors during search
38
+ CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
38
39
 
39
40
  CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
40
41
  LOAD_TIMEOUT_DEFAULT = 2.5 * 3600 # 2.5h
@@ -14,7 +14,7 @@ class Assembler:
14
14
  def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
15
15
  c_cls = task.case_config.case_id.case_cls
16
16
 
17
- c = c_cls()
17
+ c = c_cls(task.case_config.custom_case)
18
18
  if type(task.db_case_config) != EmptyDBCaseConfig:
19
19
  task.db_case_config.metric_type = c.dataset.data.metric_type
20
20
 
@@ -4,9 +4,13 @@ from enum import Enum, auto
4
4
  from typing import Type
5
5
 
6
6
  from vectordb_bench import config
7
+ from vectordb_bench.backend.clients.api import MetricType
7
8
  from vectordb_bench.base import BaseModel
9
+ from vectordb_bench.frontend.components.custom.getCustomConfig import (
10
+ CustomDatasetConfig,
11
+ )
8
12
 
9
- from .dataset import Dataset, DatasetManager
13
+ from .dataset import CustomDataset, Dataset, DatasetManager
10
14
 
11
15
 
12
16
  log = logging.getLogger(__name__)
@@ -44,25 +48,24 @@ class CaseType(Enum):
44
48
  Performance1536D50K = 50
45
49
 
46
50
  Custom = 100
51
+ PerformanceCustomDataset = 101
47
52
 
48
- @property
49
53
  def case_cls(self, custom_configs: dict | None = None) -> Type["Case"]:
50
- if self not in type2case:
51
- raise NotImplementedError(f"Case {self} has not implemented. You can add it manually to vectordb_bench.backend.cases.type2case or define a custom_configs['custom_cls']")
52
- return type2case[self]
54
+ if custom_configs is None:
55
+ return type2case.get(self)()
56
+ else:
57
+ return type2case.get(self)(**custom_configs)
53
58
 
54
- @property
55
- def case_name(self) -> str:
56
- c = self.case_cls
59
+ def case_name(self, custom_configs: dict | None = None) -> str:
60
+ c = self.case_cls(custom_configs)
57
61
  if c is not None:
58
- return c().name
62
+ return c.name
59
63
  raise ValueError("Case unsupported")
60
64
 
61
- @property
62
- def case_description(self) -> str:
63
- c = self.case_cls
65
+ def case_description(self, custom_configs: dict | None = None) -> str:
66
+ c = self.case_cls(custom_configs)
64
67
  if c is not None:
65
- return c().description
68
+ return c.description
66
69
  raise ValueError("Case unsupported")
67
70
 
68
71
 
@@ -289,26 +292,69 @@ Results will show index building time, recall, and maximum QPS."""
289
292
  optimize_timeout: float | int | None = 15 * 60
290
293
 
291
294
 
295
+ def metric_type_map(s: str) -> MetricType:
296
+ if s.lower() == "cosine":
297
+ return MetricType.COSINE
298
+ if s.lower() == "l2" or s.lower() == "euclidean":
299
+ return MetricType.L2
300
+ if s.lower() == "ip":
301
+ return MetricType.IP
302
+ err_msg = f"Not support metric_type: {s}"
303
+ log.error(err_msg)
304
+ raise RuntimeError(err_msg)
305
+
306
+
307
+ class PerformanceCustomDataset(PerformanceCase):
308
+ case_id: CaseType = CaseType.PerformanceCustomDataset
309
+ name: str = "Performance With Custom Dataset"
310
+ description: str = ""
311
+ dataset: DatasetManager
312
+
313
+ def __init__(
314
+ self,
315
+ name,
316
+ description,
317
+ load_timeout,
318
+ optimize_timeout,
319
+ dataset_config,
320
+ **kwargs,
321
+ ):
322
+ dataset_config = CustomDatasetConfig(**dataset_config)
323
+ dataset = CustomDataset(
324
+ name=dataset_config.name,
325
+ size=dataset_config.size,
326
+ dim=dataset_config.dim,
327
+ metric_type=metric_type_map(dataset_config.metric_type),
328
+ use_shuffled=dataset_config.use_shuffled,
329
+ with_gt=dataset_config.with_gt,
330
+ dir=dataset_config.dir,
331
+ file_num=dataset_config.file_count,
332
+ )
333
+ super().__init__(
334
+ name=name,
335
+ description=description,
336
+ load_timeout=load_timeout,
337
+ optimize_timeout=optimize_timeout,
338
+ dataset=DatasetManager(data=dataset),
339
+ )
340
+
341
+
292
342
  type2case = {
293
343
  CaseType.CapacityDim960: CapacityDim960,
294
344
  CaseType.CapacityDim128: CapacityDim128,
295
-
296
345
  CaseType.Performance768D100M: Performance768D100M,
297
346
  CaseType.Performance768D10M: Performance768D10M,
298
347
  CaseType.Performance768D1M: Performance768D1M,
299
-
300
348
  CaseType.Performance768D10M1P: Performance768D10M1P,
301
349
  CaseType.Performance768D1M1P: Performance768D1M1P,
302
350
  CaseType.Performance768D10M99P: Performance768D10M99P,
303
351
  CaseType.Performance768D1M99P: Performance768D1M99P,
304
-
305
352
  CaseType.Performance1536D500K: Performance1536D500K,
306
353
  CaseType.Performance1536D5M: Performance1536D5M,
307
-
308
354
  CaseType.Performance1536D500K1P: Performance1536D500K1P,
309
355
  CaseType.Performance1536D5M1P: Performance1536D5M1P,
310
-
311
356
  CaseType.Performance1536D500K99P: Performance1536D500K99P,
312
357
  CaseType.Performance1536D5M99P: Performance1536D5M99P,
313
358
  CaseType.Performance1536D50K: Performance1536D50K,
359
+ CaseType.PerformanceCustomDataset: PerformanceCustomDataset,
314
360
  }
@@ -32,6 +32,7 @@ class DB(Enum):
32
32
  PgVectoRS = "PgVectoRS"
33
33
  Redis = "Redis"
34
34
  Chroma = "Chroma"
35
+ AWSOpenSearch = "OpenSearch"
35
36
  Test = "test"
36
37
 
37
38
 
@@ -78,6 +79,10 @@ class DB(Enum):
78
79
  from .chroma.chroma import ChromaClient
79
80
  return ChromaClient
80
81
 
82
+ if self == DB.AWSOpenSearch:
83
+ from .aws_opensearch.aws_opensearch import AWSOpenSearch
84
+ return AWSOpenSearch
85
+
81
86
  @property
82
87
  def config_cls(self) -> Type[DBConfig]:
83
88
  """Import while in use"""
@@ -121,6 +126,10 @@ class DB(Enum):
121
126
  from .chroma.config import ChromaConfig
122
127
  return ChromaConfig
123
128
 
129
+ if self == DB.AWSOpenSearch:
130
+ from .aws_opensearch.config import AWSOpenSearchConfig
131
+ return AWSOpenSearchConfig
132
+
124
133
  def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
125
134
  if self == DB.Milvus:
126
135
  from .milvus.config import _milvus_case_config
@@ -150,6 +159,10 @@ class DB(Enum):
150
159
  from .pgvecto_rs.config import _pgvecto_rs_case_config
151
160
  return _pgvecto_rs_case_config.get(index_type)
152
161
 
162
+ if self == DB.AWSOpenSearch:
163
+ from .aws_opensearch.config import AWSOpenSearchIndexConfig
164
+ return AWSOpenSearchIndexConfig
165
+
153
166
  # DB.Pinecone, DB.Chroma, DB.Redis
154
167
  return EmptyDBCaseConfig
155
168
 
@@ -0,0 +1,159 @@
1
+ import logging
2
+ from contextlib import contextmanager
3
+ import time
4
+ from typing import Iterable, Type
5
+ from ..api import VectorDB, DBCaseConfig, DBConfig, IndexType
6
+ from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
7
+ from opensearchpy import OpenSearch
8
+ from opensearchpy.helpers import bulk
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ class AWSOpenSearch(VectorDB):
14
+ def __init__(
15
+ self,
16
+ dim: int,
17
+ db_config: dict,
18
+ db_case_config: AWSOpenSearchIndexConfig,
19
+ index_name: str = "vdb_bench_index", # must be lowercase
20
+ id_col_name: str = "id",
21
+ vector_col_name: str = "embedding",
22
+ drop_old: bool = False,
23
+ **kwargs,
24
+ ):
25
+ self.dim = dim
26
+ self.db_config = db_config
27
+ self.case_config = db_case_config
28
+ self.index_name = index_name
29
+ self.id_col_name = id_col_name
30
+ self.category_col_names = [
31
+ f"scalar-{categoryCount}" for categoryCount in [2, 5, 10, 100, 1000]
32
+ ]
33
+ self.vector_col_name = vector_col_name
34
+
35
+ log.info(f"AWS_OpenSearch client config: {self.db_config}")
36
+ client = OpenSearch(**self.db_config)
37
+ if drop_old:
38
+ log.info(f"AWS_OpenSearch client drop old index: {self.index_name}")
39
+ is_existed = client.indices.exists(index=self.index_name)
40
+ if is_existed:
41
+ client.indices.delete(index=self.index_name)
42
+ self._create_index(client)
43
+
44
+ @classmethod
45
+ def config_cls(cls) -> AWSOpenSearchConfig:
46
+ return AWSOpenSearchConfig
47
+
48
+ @classmethod
49
+ def case_config_cls(
50
+ cls, index_type: IndexType | None = None
51
+ ) -> AWSOpenSearchIndexConfig:
52
+ return AWSOpenSearchIndexConfig
53
+
54
+ def _create_index(self, client: OpenSearch):
55
+ settings = {
56
+ "index": {
57
+ "knn": True,
58
+ # "number_of_shards": 5,
59
+ # "refresh_interval": "600s",
60
+ }
61
+ }
62
+ mappings = {
63
+ "properties": {
64
+ self.id_col_name: {"type": "integer"},
65
+ **{
66
+ categoryCol: {"type": "keyword"}
67
+ for categoryCol in self.category_col_names
68
+ },
69
+ self.vector_col_name: {
70
+ "type": "knn_vector",
71
+ "dimension": self.dim,
72
+ "method": self.case_config.index_param(),
73
+ },
74
+ }
75
+ }
76
+ try:
77
+ client.indices.create(
78
+ index=self.index_name, body=dict(settings=settings, mappings=mappings)
79
+ )
80
+ except Exception as e:
81
+ log.warning(f"Failed to create index: {self.index_name} error: {str(e)}")
82
+ raise e from None
83
+
84
+ @contextmanager
85
+ def init(self) -> None:
86
+ """connect to elasticsearch"""
87
+ self.client = OpenSearch(**self.db_config)
88
+
89
+ yield
90
+ # self.client.transport.close()
91
+ self.client = None
92
+ del self.client
93
+
94
+ def insert_embeddings(
95
+ self,
96
+ embeddings: Iterable[list[float]],
97
+ metadata: list[int],
98
+ **kwargs,
99
+ ) -> tuple[int, Exception]:
100
+ """Insert the embeddings to the elasticsearch."""
101
+ assert self.client is not None, "should self.init() first"
102
+
103
+ insert_data = []
104
+ for i in range(len(embeddings)):
105
+ insert_data.append({"index": {"_index": self.index_name, "_id": metadata[i]}})
106
+ insert_data.append({self.vector_col_name: embeddings[i]})
107
+ try:
108
+ resp = self.client.bulk(insert_data)
109
+ log.info(f"AWS_OpenSearch adding documents: {len(resp['items'])}")
110
+ resp = self.client.indices.stats(self.index_name)
111
+ log.info(f"Total document count in index: {resp['_all']['primaries']['indexing']['index_total']}")
112
+ return (len(embeddings), None)
113
+ except Exception as e:
114
+ log.warning(f"Failed to insert data: {self.index_name} error: {str(e)}")
115
+ time.sleep(10)
116
+ return self.insert_embeddings(embeddings, metadata)
117
+
118
+ def search_embedding(
119
+ self,
120
+ query: list[float],
121
+ k: int = 100,
122
+ filters: dict | None = None,
123
+ ) -> list[int]:
124
+ """Get k most similar embeddings to query vector.
125
+
126
+ Args:
127
+ query(list[float]): query embedding to look up documents similar to.
128
+ k(int): Number of most similar embeddings to return. Defaults to 100.
129
+ filters(dict, optional): filtering expression to filter the data while searching.
130
+
131
+ Returns:
132
+ list[tuple[int, float]]: list of k most similar embeddings in (id, score) tuple to the query embedding.
133
+ """
134
+ assert self.client is not None, "should self.init() first"
135
+
136
+ body = {
137
+ "size": k,
138
+ "query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
139
+ }
140
+ try:
141
+ resp = self.client.search(index=self.index_name, body=body)
142
+ log.info(f'Search took: {resp["took"]}')
143
+ log.info(f'Search shards: {resp["_shards"]}')
144
+ log.info(f'Search hits total: {resp["hits"]["total"]}')
145
+ result = [int(d["_id"]) for d in resp["hits"]["hits"]]
146
+ # log.info(f'success! length={len(res)}')
147
+
148
+ return result
149
+ except Exception as e:
150
+ log.warning(f"Failed to search: {self.index_name} error: {str(e)}")
151
+ raise e from None
152
+
153
+ def optimize(self):
154
+ """optimize will be called between insertion and search in performance cases."""
155
+ pass
156
+
157
+ def ready_to_load(self):
158
+ """ready_to_load will be called before load in load cases."""
159
+ pass
@@ -0,0 +1,44 @@
1
+ from typing import Annotated, TypedDict, Unpack
2
+
3
+ import click
4
+ from pydantic import SecretStr
5
+
6
+ from ....cli.cli import (
7
+ CommonTypedDict,
8
+ HNSWFlavor2,
9
+ cli,
10
+ click_parameter_decorators_from_typed_dict,
11
+ run,
12
+ )
13
+ from .. import DB
14
+
15
+
16
+ class AWSOpenSearchTypedDict(TypedDict):
17
+ host: Annotated[
18
+ str, click.option("--host", type=str, help="Db host", required=True)
19
+ ]
20
+ port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
21
+ user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
22
+ password: Annotated[str, click.option("--password", type=str, help="Db password")]
23
+
24
+
25
+ class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2):
26
+ ...
27
+
28
+
29
+ @cli.command()
30
+ @click_parameter_decorators_from_typed_dict(AWSOpenSearchHNSWTypedDict)
31
+ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
32
+ from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
33
+ run(
34
+ db=DB.AWSOpenSearch,
35
+ db_config=AWSOpenSearchConfig(
36
+ host=parameters["host"],
37
+ port=parameters["port"],
38
+ user=parameters["user"],
39
+ password=SecretStr(parameters["password"]),
40
+ ),
41
+ db_case_config=AWSOpenSearchIndexConfig(
42
+ ),
43
+ **parameters,
44
+ )
@@ -0,0 +1,58 @@
1
+ from enum import Enum
2
+ from pydantic import SecretStr, BaseModel
3
+
4
+ from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
5
+
6
+
7
+ class AWSOpenSearchConfig(DBConfig, BaseModel):
8
+ host: str = ""
9
+ port: int = 443
10
+ user: str = ""
11
+ password: SecretStr = ""
12
+
13
+ def to_dict(self) -> dict:
14
+ return {
15
+ "hosts": [{'host': self.host, 'port': self.port}],
16
+ "http_auth": (self.user, self.password.get_secret_value()),
17
+ "use_ssl": True,
18
+ "http_compress": True,
19
+ "verify_certs": True,
20
+ "ssl_assert_hostname": False,
21
+ "ssl_show_warn": False,
22
+ "timeout": 600,
23
+ }
24
+
25
+
26
+ class AWSOS_Engine(Enum):
27
+ nmslib = "nmslib"
28
+ faiss = "faiss"
29
+ lucene = "Lucene"
30
+
31
+
32
+ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
33
+ metric_type: MetricType = MetricType.L2
34
+ engine: AWSOS_Engine = AWSOS_Engine.nmslib
35
+ efConstruction: int = 360
36
+ M: int = 30
37
+
38
+ def parse_metric(self) -> str:
39
+ if self.metric_type == MetricType.IP:
40
+ return "innerproduct" # only support faiss / nmslib, not for Lucene.
41
+ elif self.metric_type == MetricType.COSINE:
42
+ return "cosinesimil"
43
+ return "l2"
44
+
45
+ def index_param(self) -> dict:
46
+ params = {
47
+ "name": "hnsw",
48
+ "space_type": self.parse_metric(),
49
+ "engine": self.engine.value,
50
+ "parameters": {
51
+ "ef_construction": self.efConstruction,
52
+ "m": self.M
53
+ }
54
+ }
55
+ return params
56
+
57
+ def search_param(self) -> dict:
58
+ return {}