vectordb-bench 0.0.18__tar.gz → 0.0.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/PKG-INFO +13 -28
  2. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/README.md +5 -8
  3. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/pyproject.toml +19 -18
  4. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_rate_runner.py +3 -3
  5. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/cases.py +1 -1
  6. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/__init__.py +26 -0
  7. vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +27 -0
  8. vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +19 -0
  9. vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +304 -0
  10. vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_opensearch/config.py +48 -0
  11. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/alloydb/cli.py +1 -1
  12. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/api.py +3 -0
  13. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/milvus/cli.py +25 -1
  14. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/milvus/config.py +16 -2
  15. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/milvus/milvus.py +5 -7
  16. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/rate_runner.py +32 -15
  17. vectordb_bench-0.0.19/vectordb_bench/backend/runner/read_write_runner.py +178 -0
  18. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/serial_runner.py +8 -2
  19. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/util.py +0 -16
  20. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/task_runner.py +4 -3
  21. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/utils.py +1 -0
  22. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/config/dbCaseConfigs.py +58 -0
  23. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/PKG-INFO +13 -28
  24. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/SOURCES.txt +4 -0
  25. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/requires.txt +7 -23
  26. vectordb_bench-0.0.18/vectordb_bench/backend/runner/read_write_runner.py +0 -112
  27. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.devcontainer/Dockerfile +0 -0
  28. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.devcontainer/devcontainer.json +0 -0
  29. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.env.example +0 -0
  30. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.github/workflows/publish_package_on_release.yml +0 -0
  31. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.github/workflows/pull_request.yml +0 -0
  32. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.gitignore +0 -0
  33. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/.ruff.toml +0 -0
  34. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/Dockerfile +0 -0
  35. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/LICENSE +0 -0
  36. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/Makefile +0 -0
  37. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/OWNERS +0 -0
  38. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/fig/custom_case_run_test.png +0 -0
  39. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/fig/custom_dataset.png +0 -0
  40. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/install/requirements_py3.11.txt +0 -0
  41. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/install.py +0 -0
  42. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/setup.cfg +0 -0
  43. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/conftest.py +0 -0
  44. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/pytest.ini +0 -0
  45. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_bench_runner.py +0 -0
  46. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_chroma.py +0 -0
  47. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_data_source.py +0 -0
  48. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_dataset.py +0 -0
  49. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_elasticsearch_cloud.py +0 -0
  50. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_models.py +0 -0
  51. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_redis.py +0 -0
  52. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/test_utils.py +0 -0
  53. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/tests/ut_cases.py +0 -0
  54. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/__init__.py +0 -0
  55. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/__main__.py +0 -0
  56. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/__init__.py +0 -0
  57. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/assembler.py +0 -0
  58. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
  59. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  60. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +0 -0
  61. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -0
  62. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/config.py +0 -0
  63. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  64. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  65. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  66. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  67. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  68. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  69. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  70. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
  71. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
  72. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
  73. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
  74. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  75. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  76. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  77. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  78. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  79. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  80. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  81. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  82. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
  83. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  84. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  85. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  86. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  87. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  88. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/redis/config.py +0 -0
  89. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  90. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/test/cli.py +0 -0
  91. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/test/config.py +0 -0
  92. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/test/test.py +0 -0
  93. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  94. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  95. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  96. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  97. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  98. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  99. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/data_source.py +0 -0
  100. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/dataset.py +0 -0
  101. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/result_collector.py +0 -0
  102. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/__init__.py +0 -0
  103. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/mp_runner.py +0 -0
  104. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/base.py +0 -0
  105. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/cli/__init__.py +0 -0
  106. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/cli/cli.py +0 -0
  107. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/cli/vectordbbench.py +0 -0
  108. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/config-files/sample_config.yml +0 -0
  109. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/custom/custom_case.json +0 -0
  110. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  111. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  112. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  113. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  114. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  115. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  116. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  117. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  118. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  119. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  120. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
  121. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -0
  122. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
  123. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  124. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  125. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  126. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
  127. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  128. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  129. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  130. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  131. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  132. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
  133. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/tables/data.py +0 -0
  134. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  135. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/config/styles.py +0 -0
  136. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/concurrent.py +0 -0
  137. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/custom.py +0 -0
  138. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
  139. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/run_test.py +0 -0
  140. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/tables.py +0 -0
  141. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/utils.py +0 -0
  142. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
  143. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/interface.py +0 -0
  144. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/log_util.py +0 -0
  145. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/metric.py +0 -0
  146. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/models.py +0 -0
  147. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  148. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  149. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  150. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  151. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  152. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  153. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  154. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  155. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  156. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  157. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  158. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  159. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  160. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  161. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
  162. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/dbPrices.json +0 -0
  163. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/getLeaderboardData.py +0 -0
  164. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench/results/leaderboard.json +0 -0
  165. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  166. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/entry_points.txt +0 -0
  167. {vectordb_bench-0.0.18 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.18
3
+ Version: 0.0.19
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -35,15 +35,16 @@ Requires-Dist: qdrant-client; extra == "all"
35
35
  Requires-Dist: pinecone-client; extra == "all"
36
36
  Requires-Dist: weaviate-client; extra == "all"
37
37
  Requires-Dist: elasticsearch; extra == "all"
38
- Requires-Dist: pgvector; extra == "all"
39
- Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
40
38
  Requires-Dist: sqlalchemy; extra == "all"
41
39
  Requires-Dist: redis; extra == "all"
42
40
  Requires-Dist: chromadb; extra == "all"
41
+ Requires-Dist: pgvector; extra == "all"
43
42
  Requires-Dist: psycopg; extra == "all"
44
43
  Requires-Dist: psycopg-binary; extra == "all"
45
- Requires-Dist: opensearch-dsl==2.1.0; extra == "all"
46
- Requires-Dist: opensearch-py==2.6.0; extra == "all"
44
+ Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
45
+ Requires-Dist: opensearch-dsl; extra == "all"
46
+ Requires-Dist: opensearch-py; extra == "all"
47
+ Requires-Dist: memorydb; extra == "all"
47
48
  Provides-Extra: qdrant
48
49
  Requires-Dist: qdrant-client; extra == "qdrant"
49
50
  Provides-Extra: pinecone
@@ -56,18 +57,6 @@ Provides-Extra: pgvector
56
57
  Requires-Dist: psycopg; extra == "pgvector"
57
58
  Requires-Dist: psycopg-binary; extra == "pgvector"
58
59
  Requires-Dist: pgvector; extra == "pgvector"
59
- Provides-Extra: pgvectorscale
60
- Requires-Dist: psycopg; extra == "pgvectorscale"
61
- Requires-Dist: psycopg-binary; extra == "pgvectorscale"
62
- Requires-Dist: pgvector; extra == "pgvectorscale"
63
- Provides-Extra: pgdiskann
64
- Requires-Dist: psycopg; extra == "pgdiskann"
65
- Requires-Dist: psycopg-binary; extra == "pgdiskann"
66
- Requires-Dist: pgvector; extra == "pgdiskann"
67
- Provides-Extra: alloydb
68
- Requires-Dist: psycopg; extra == "alloydb"
69
- Requires-Dist: psycopg-binary; extra == "alloydb"
70
- Requires-Dist: pgvector; extra == "alloydb"
71
60
  Provides-Extra: pgvecto-rs
72
61
  Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "pgvecto-rs"
73
62
  Provides-Extra: redis
@@ -76,9 +65,8 @@ Provides-Extra: memorydb
76
65
  Requires-Dist: memorydb; extra == "memorydb"
77
66
  Provides-Extra: chromadb
78
67
  Requires-Dist: chromadb; extra == "chromadb"
79
- Provides-Extra: awsopensearch
80
- Requires-Dist: awsopensearch; extra == "awsopensearch"
81
- Provides-Extra: zilliz-cloud
68
+ Provides-Extra: opensearch
69
+ Requires-Dist: opensearch-py; extra == "opensearch"
82
70
 
83
71
  # VectorDBBench: A Benchmark Tool for VectorDB
84
72
 
@@ -111,21 +99,18 @@ All the database client supported
111
99
 
112
100
  | Optional database client | install command |
113
101
  |--------------------------|---------------------------------------------|
114
- | pymilvus(*default*) | `pip install vectordb-bench` |
115
- | all | `pip install vectordb-bench[all]` |
102
+ | pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
103
+ | all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
116
104
  | qdrant | `pip install vectordb-bench[qdrant]` |
117
105
  | pinecone | `pip install vectordb-bench[pinecone]` |
118
106
  | weaviate | `pip install vectordb-bench[weaviate]` |
119
- | elastic | `pip install vectordb-bench[elastic]` |
120
- | pgvector | `pip install vectordb-bench[pgvector]` |
107
+ | elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
108
+ | pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
121
109
  | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
122
- | pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
123
- | pgdiskann | `pip install vectordb-bench[pgdiskann]` |
124
110
  | redis | `pip install vectordb-bench[redis]` |
125
111
  | memorydb | `pip install vectordb-bench[memorydb]` |
126
112
  | chromadb | `pip install vectordb-bench[chromadb]` |
127
- | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
128
- | alloydb | `pip install vectordb-bench[alloydb]` |
113
+ | awsopensearch | `pip install vectordb-bench[opensearch]` |
129
114
 
130
115
  ### Run
131
116
 
@@ -29,21 +29,18 @@ All the database client supported
29
29
 
30
30
  | Optional database client | install command |
31
31
  |--------------------------|---------------------------------------------|
32
- | pymilvus(*default*) | `pip install vectordb-bench` |
33
- | all | `pip install vectordb-bench[all]` |
32
+ | pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
33
+ | all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
34
34
  | qdrant | `pip install vectordb-bench[qdrant]` |
35
35
  | pinecone | `pip install vectordb-bench[pinecone]` |
36
36
  | weaviate | `pip install vectordb-bench[weaviate]` |
37
- | elastic | `pip install vectordb-bench[elastic]` |
38
- | pgvector | `pip install vectordb-bench[pgvector]` |
37
+ | elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
38
+ | pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
39
39
  | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
40
- | pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
41
- | pgdiskann | `pip install vectordb-bench[pgdiskann]` |
42
40
  | redis | `pip install vectordb-bench[redis]` |
43
41
  | memorydb | `pip install vectordb-bench[memorydb]` |
44
42
  | chromadb | `pip install vectordb-bench[chromadb]` |
45
- | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
46
- | alloydb | `pip install vectordb-bench[alloydb]` |
43
+ | awsopensearch | `pip install vectordb-bench[opensearch]` |
47
44
 
48
45
  ### Run
49
46
 
@@ -55,31 +55,32 @@ all = [
55
55
  "pinecone-client",
56
56
  "weaviate-client",
57
57
  "elasticsearch",
58
- "pgvector",
59
- "pgvecto_rs[psycopg3]>=0.2.2",
60
58
  "sqlalchemy",
61
59
  "redis",
62
60
  "chromadb",
61
+ "pgvector",
63
62
  "psycopg",
64
63
  "psycopg-binary",
65
- "opensearch-dsl==2.1.0",
66
- "opensearch-py==2.6.0",
64
+ "pgvecto_rs[psycopg3]>=0.2.2",
65
+ "opensearch-dsl",
66
+ "opensearch-py",
67
+ "memorydb",
67
68
  ]
68
69
 
69
- qdrant = [ "qdrant-client" ]
70
- pinecone = [ "pinecone-client" ]
71
- weaviate = [ "weaviate-client" ]
72
- elastic = [ "elasticsearch" ]
73
- pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
74
- pgvectorscale = [ "psycopg", "psycopg-binary", "pgvector" ]
75
- pgdiskann = [ "psycopg", "psycopg-binary", "pgvector" ]
76
- alloydb = [ "psycopg", "psycopg-binary", "pgvector"]
77
- pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.2" ]
78
- redis = [ "redis" ]
79
- memorydb = [ "memorydb" ]
80
- chromadb = [ "chromadb" ]
81
- awsopensearch = [ "awsopensearch" ]
82
- zilliz_cloud = []
70
+ qdrant = [ "qdrant-client" ]
71
+ pinecone = [ "pinecone-client" ]
72
+ weaviate = [ "weaviate-client" ]
73
+ elastic = [ "elasticsearch" ]
74
+ # For elastic and aliyun_elasticsearch
75
+
76
+ pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
77
+ # for pgvector, pgvectorscale, pgdiskann, and, alloydb
78
+
79
+ pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.2" ]
80
+ redis = [ "redis" ]
81
+ memorydb = [ "memorydb" ]
82
+ chromadb = [ "chromadb" ]
83
+ opensearch = [ "opensearch-py" ]
83
84
 
84
85
  [project.urls]
85
86
  "repository" = "https://github.com/zilliztech/VectorDBBench"
@@ -52,9 +52,9 @@ def test_read_write_runner(db, insert_rate, conc: list, search_stage: Iterable[f
52
52
 
53
53
  def get_db(db: str, config: dict) -> VectorDB:
54
54
  if db == DB.Milvus.name:
55
- return DB.Milvus.init_cls(dim=768, db_config=config, db_case_config=FLATConfig(metric_type="COSINE"), drop_old=True, pre_load=True)
55
+ return DB.Milvus.init_cls(dim=768, db_config=config, db_case_config=FLATConfig(metric_type="COSINE"), drop_old=True)
56
56
  elif db == DB.ZillizCloud.name:
57
- return DB.ZillizCloud.init_cls(dim=768, db_config=config, db_case_config=AutoIndexConfig(metric_type="COSINE"), drop_old=True, pre_load=True)
57
+ return DB.ZillizCloud.init_cls(dim=768, db_config=config, db_case_config=AutoIndexConfig(metric_type="COSINE"), drop_old=True)
58
58
  else:
59
59
  raise ValueError(f"unknown db: {db}")
60
60
 
@@ -76,7 +76,7 @@ if __name__ == "__main__":
76
76
  }
77
77
 
78
78
  conc = (1, 15, 50)
79
- search_stage = (0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
79
+ search_stage = (0.5, 0.6, 0.7, 0.8, 0.9)
80
80
 
81
81
  db = get_db(flags.db, config)
82
82
  test_read_write_runner(
@@ -289,7 +289,7 @@ class Performance1536D50K(PerformanceCase):
289
289
  description: str = """This case tests the search performance of a vector database with a medium 50K dataset (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels.
290
290
  Results will show index building time, recall, and maximum QPS."""
291
291
  load_timeout: float | int = 3600
292
- optimize_timeout: float | int | None = 15 * 60
292
+ optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
293
293
 
294
294
 
295
295
  def metric_type_map(s: str) -> MetricType:
@@ -37,7 +37,9 @@ class DB(Enum):
37
37
  MemoryDB = "MemoryDB"
38
38
  Chroma = "Chroma"
39
39
  AWSOpenSearch = "OpenSearch"
40
+ AliyunElasticsearch = "AliyunElasticsearch"
40
41
  Test = "test"
42
+ AliyunOpenSearch = "AliyunOpenSearch"
41
43
 
42
44
 
43
45
  @property
@@ -103,6 +105,14 @@ class DB(Enum):
103
105
  from .alloydb.alloydb import AlloyDB
104
106
  return AlloyDB
105
107
 
108
+ if self == DB.AliyunElasticsearch:
109
+ from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch
110
+ return AliyunElasticsearch
111
+
112
+ if self == DB.AliyunOpenSearch:
113
+ from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
114
+ return AliyunOpenSearch
115
+
106
116
  @property
107
117
  def config_cls(self) -> Type[DBConfig]:
108
118
  """Import while in use"""
@@ -166,6 +176,14 @@ class DB(Enum):
166
176
  from .alloydb.config import AlloyDBConfig
167
177
  return AlloyDBConfig
168
178
 
179
+ if self == DB.AliyunElasticsearch:
180
+ from .aliyun_elasticsearch.config import AliyunElasticsearchConfig
181
+ return AliyunElasticsearchConfig
182
+
183
+ if self == DB.AliyunOpenSearch:
184
+ from .aliyun_opensearch.config import AliyunOpenSearchConfig
185
+ return AliyunOpenSearchConfig
186
+
169
187
  def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
170
188
  if self == DB.Milvus:
171
189
  from .milvus.config import _milvus_case_config
@@ -211,6 +229,14 @@ class DB(Enum):
211
229
  from .alloydb.config import _alloydb_case_config
212
230
  return _alloydb_case_config.get(index_type)
213
231
 
232
+ if self == DB.AliyunElasticsearch:
233
+ from .elastic_cloud.config import ElasticCloudIndexConfig
234
+ return ElasticCloudIndexConfig
235
+
236
+ if self == DB.AliyunOpenSearch:
237
+ from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
238
+ return AliyunOpenSearchIndexConfig
239
+
214
240
  # DB.Pinecone, DB.Chroma, DB.Redis
215
241
  return EmptyDBCaseConfig
216
242
 
@@ -0,0 +1,27 @@
1
+ from ..elastic_cloud.elastic_cloud import ElasticCloud
2
+ from ..elastic_cloud.config import ElasticCloudIndexConfig
3
+
4
+
5
+ class AliyunElasticsearch(ElasticCloud):
6
+ def __init__(
7
+ self,
8
+ dim: int,
9
+ db_config: dict,
10
+ db_case_config: ElasticCloudIndexConfig,
11
+ indice: str = "vdb_bench_indice", # must be lowercase
12
+ id_col_name: str = "id",
13
+ vector_col_name: str = "vector",
14
+ drop_old: bool = False,
15
+ **kwargs,
16
+ ):
17
+ super().__init__(
18
+ dim=dim,
19
+ db_config=db_config,
20
+ db_case_config=db_case_config,
21
+ indice=indice,
22
+ id_col_name=id_col_name,
23
+ vector_col_name=vector_col_name,
24
+ drop_old=drop_old,
25
+ **kwargs,
26
+ )
27
+
@@ -0,0 +1,19 @@
1
+ from enum import Enum
2
+ from pydantic import SecretStr, BaseModel
3
+
4
+ from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
5
+
6
+
7
+ class AliyunElasticsearchConfig(DBConfig, BaseModel):
8
+ #: Protocol in use to connect to the node
9
+ scheme: str = "http"
10
+ host: str = ""
11
+ port: int = 9200
12
+ user: str = "elastic"
13
+ password: SecretStr
14
+
15
+ def to_dict(self) -> dict:
16
+ return {
17
+ "hosts": [{'scheme': self.scheme, 'host': self.host, 'port': self.port}],
18
+ "basic_auth": (self.user, self.password.get_secret_value()),
19
+ }
@@ -0,0 +1,304 @@
1
+ import json
2
+ import logging
3
+ from contextlib import contextmanager
4
+ import time
5
+
6
+ from alibabacloud_ha3engine_vector.models import QueryRequest
7
+
8
+ from ..api import VectorDB, MetricType
9
+ from .config import AliyunOpenSearchIndexConfig
10
+
11
+ from alibabacloud_searchengine20211025.client import Client as searchengineClient
12
+ from alibabacloud_searchengine20211025 import models as searchengine_models
13
+ from alibabacloud_tea_openapi import models as open_api_models
14
+ from alibabacloud_ha3engine_vector import models, client
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+ ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
19
+ ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
20
+
21
+ class AliyunOpenSearch(VectorDB):
22
+ def __init__(
23
+ self,
24
+ dim: int,
25
+ db_config: dict,
26
+ db_case_config: AliyunOpenSearchIndexConfig,
27
+ collection_name: str = "VectorDBBenchCollection",
28
+ drop_old: bool = False,
29
+ **kwargs,
30
+ ):
31
+ self.control_client = None
32
+ self.dim = dim
33
+ self.db_config = db_config
34
+ self.case_config = db_case_config
35
+ self.collection_name = collection_name
36
+ self.instance_id = db_config["host"].split(".")[0].replace("http://", "").replace("https://", "")
37
+
38
+ self._primary_field = "id"
39
+ self._scalar_field = "int_id"
40
+ self._vector_field = "vector"
41
+ self._index_name = "vector_idx"
42
+
43
+ self.batch_size = int(
44
+ min(ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25), ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH)
45
+ )
46
+
47
+ log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
48
+ control_config = open_api_models.Config(
49
+ access_key_id=self.db_config["ak"],
50
+ access_key_secret=self.db_config["sk"],
51
+ endpoint=self.db_config["control_host"]
52
+ )
53
+ self.control_client = searchengineClient(control_config)
54
+
55
+ if drop_old:
56
+ log.info(f"aliyun_OpenSearch client drop old index: {self.collection_name}")
57
+ if self._index_exists(self.control_client):
58
+ self._modify_index(self.control_client)
59
+ else:
60
+ self._create_index(self.control_client)
61
+
62
+ def _create_index(self, client: searchengineClient):
63
+ create_table_request = searchengine_models.CreateTableRequest()
64
+ create_table_request.name = self.collection_name
65
+ create_table_request.primary_key = self._primary_field
66
+ create_table_request.partition_count = 1
67
+ create_table_request.field_schema = {
68
+ self._primary_field: "INT64",
69
+ self._vector_field: "MULTI_FLOAT",
70
+ self._scalar_field: "INT64"
71
+ }
72
+ vector_index = searchengine_models.ModifyTableRequestVectorIndex()
73
+ vector_index.index_name = self._index_name
74
+ vector_index.dimension = self.dim
75
+ vector_index.distance_type = self.case_config.distance_type()
76
+ vector_index.vector_field = self._vector_field
77
+ vector_index.vector_index_type = "HNSW"
78
+
79
+ advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
80
+ advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
81
+ advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
82
+ vector_index.advance_params = advance_params
83
+ create_table_request.vector_index = [vector_index]
84
+
85
+ try:
86
+ response = client.create_table(self.instance_id, create_table_request)
87
+ log.info(f"create table success: {response.body}")
88
+ except Exception as error:
89
+ log.info(error.message)
90
+ log.info(error.data.get("Recommend"))
91
+ log.info(f"Failed to create index: error: {str(error)}")
92
+ raise error from None
93
+
94
+ # check if index create success
95
+ self._active_index(client)
96
+
97
+ # check if index create success
98
+ def _active_index(self, client: searchengineClient) -> None:
99
+ retry_times = 0
100
+ while True:
101
+ time.sleep(10)
102
+ log.info(f"begin to {retry_times} times get table")
103
+ retry_times += 1
104
+ response = client.get_table(self.instance_id, self.collection_name)
105
+ if response.body.result.status == 'IN_USE':
106
+ log.info(f"{self.collection_name} table begin to use.")
107
+ return
108
+
109
+ def _index_exists(self, client: searchengineClient) -> bool:
110
+ try:
111
+ client.get_table(self.instance_id, self.collection_name)
112
+ return True
113
+ except Exception as error:
114
+ log.info(f'get table from searchengine error')
115
+ log.info(error.message)
116
+ return False
117
+
118
+ # check if index build success, Insert the embeddings to the vector database after index build success
119
+ def _index_build_success(self, client: searchengineClient) -> None:
120
+ log.info(f"begin to check if table build success.")
121
+ time.sleep(50)
122
+
123
+ retry_times = 0
124
+ while True:
125
+ time.sleep(10)
126
+ log.info(f"begin to {retry_times} times get table fsm")
127
+ retry_times += 1
128
+ request = searchengine_models.ListTasksRequest()
129
+ request.start = (int(time.time()) - 3600) * 1000
130
+ request.end = int(time.time()) * 1000
131
+ response = client.list_tasks(self.instance_id, request)
132
+ fsms = response.body.result
133
+ cur_fsm = None
134
+ for fsm in fsms:
135
+ if fsm["type"] != "datasource_flow_fsm":
136
+ continue
137
+ if self.collection_name not in fsm["fsmId"]:
138
+ continue
139
+ cur_fsm = fsm
140
+ break
141
+ if cur_fsm is None:
142
+ print("no build index fsm")
143
+ return
144
+ if "success" == cur_fsm["status"]:
145
+ return
146
+
147
+ def _modify_index(self, client: searchengineClient) -> None:
148
+ # check if index create success
149
+ self._active_index(client)
150
+
151
+ modify_table_request = searchengine_models.ModifyTableRequest()
152
+ modify_table_request.partition_count = 1
153
+ modify_table_request.primary_key = self._primary_field
154
+ modify_table_request.field_schema = {
155
+ self._primary_field: "INT64",
156
+ self._vector_field: "MULTI_FLOAT",
157
+ self._scalar_field: "INT64"
158
+ }
159
+ vector_index = searchengine_models.ModifyTableRequestVectorIndex()
160
+ vector_index.index_name = self._index_name
161
+ vector_index.dimension = self.dim
162
+ vector_index.distance_type = self.case_config.distance_type()
163
+ vector_index.vector_field = self._vector_field
164
+ vector_index.vector_index_type = "HNSW"
165
+ advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
166
+ advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
167
+ advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
168
+ vector_index.advance_params = advance_params
169
+
170
+ modify_table_request.vector_index = [vector_index]
171
+
172
+ try:
173
+ response = client.modify_table(self.instance_id, self.collection_name, modify_table_request)
174
+ log.info(f"modify table success: {response.body}")
175
+ except Exception as error:
176
+ log.info(error.message)
177
+ log.info(error.data.get("Recommend"))
178
+ log.info(f"Failed to modify index: error: {str(error)}")
179
+ raise error from None
180
+
181
+ # check if modify index & delete data fsm success
182
+ self._index_build_success(client)
183
+
184
+ # get collection records total count
185
+ def _get_total_count(self):
186
+ try:
187
+ response = self.client.stats(self.collection_name)
188
+ body = json.loads(response.body)
189
+ log.info(f"stats info: {response.body}")
190
+
191
+ if "result" in body and "totalDocCount" in body.get("result"):
192
+ return body.get("result").get("totalDocCount")
193
+ else:
194
+ return 0
195
+ except Exception as e:
196
+ print(f"Error querying index: {e}")
197
+ return 0
198
+
199
+ @contextmanager
200
+ def init(self) -> None:
201
+ """connect to aliyun opensearch"""
202
+ config = models.Config(
203
+ endpoint=self.db_config["host"],
204
+ protocol="http",
205
+ access_user_name=self.db_config["user"],
206
+ access_pass_word=self.db_config["password"]
207
+ )
208
+
209
+ self.client = client.Client(config)
210
+
211
+ yield
212
+ # self.client.transport.close()
213
+ self.client = None
214
+ del self.client
215
+
216
+ def insert_embeddings(
217
+ self,
218
+ embeddings: list[list[float]],
219
+ metadata: list[int],
220
+ **kwargs,
221
+ ) -> tuple[int, Exception]:
222
+ """Insert the embeddings to the opensearch."""
223
+ assert self.client is not None, "should self.init() first"
224
+ assert len(embeddings) == len(metadata)
225
+ insert_count = 0
226
+
227
+ try:
228
+ for batch_start_offset in range(0, len(embeddings), self.batch_size):
229
+ batch_end_offset = min(
230
+ batch_start_offset + self.batch_size, len(embeddings)
231
+ )
232
+ documents = []
233
+ for i in range(batch_start_offset, batch_end_offset):
234
+ documentFields = {
235
+ self._primary_field: metadata[i],
236
+ self._vector_field: embeddings[i],
237
+ self._scalar_field: metadata[i],
238
+ "ops_build_channel": "inc"
239
+ }
240
+ document = {
241
+ "fields": documentFields,
242
+ "cmd": "add"
243
+ }
244
+ documents.append(document)
245
+
246
+ pushDocumentsRequest = models.PushDocumentsRequest({}, documents)
247
+ self.client.push_documents(self.collection_name, self._primary_field, pushDocumentsRequest)
248
+ insert_count += batch_end_offset - batch_start_offset
249
+ except Exception as e:
250
+ log.info(f"Failed to insert data: {e}")
251
+ return (insert_count, e)
252
+ return (insert_count, None)
253
+
254
+ def search_embedding(
255
+ self,
256
+ query: list[float],
257
+ k: int = 100,
258
+ filters: dict | None = None,
259
+ ) -> list[int]:
260
+ assert self.client is not None, "should self.init() first"
261
+ search_params = "{\"proxima.hnsw.searcher.ef\":"+ str(self.case_config.ef_search) +"}"
262
+
263
+ os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
264
+
265
+ try:
266
+ request = QueryRequest(table_name=self.collection_name,
267
+ vector=query,
268
+ top_k=k,
269
+ search_params=search_params, filter=os_filter)
270
+ result = self.client.query(request)
271
+ except Exception as e:
272
+ log.info(f"Error querying index: {e}")
273
+ raise e
274
+ res = json.loads(result.body)
275
+ id_res = [one_res["id"] for one_res in res["result"]]
276
+ return id_res
277
+
278
+ def need_normalize_cosine(self) -> bool:
279
+ """Wheather this database need to normalize dataset to support COSINE"""
280
+ if self.case_config.metric_type == MetricType.COSINE:
281
+ log.info(f"cosine dataset need normalize.")
282
+ return True
283
+
284
+ return False
285
+
286
+ def optimize(self):
287
+ pass
288
+
289
+ def optimize_with_size(self, data_size: int):
290
+ log.info(f"optimize count: {data_size}")
291
+ retry_times = 0
292
+ while True:
293
+ time.sleep(10)
294
+ log.info(f"begin to {retry_times} times get optimize table")
295
+ retry_times += 1
296
+ total_count = self._get_total_count()
297
+ # check if the data is inserted
298
+ if total_count == data_size:
299
+ log.info(f"optimize table finish.")
300
+ return
301
+
302
+ def ready_to_load(self):
303
+ """ready_to_load will be called before load in load cases."""
304
+ pass
@@ -0,0 +1,48 @@
1
+ import logging
2
+ from enum import Enum
3
+ from pydantic import SecretStr, BaseModel
4
+
5
+ from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
6
+
7
+ log = logging.getLogger(__name__)
8
+
9
+
10
+ class AliyunOpenSearchConfig(DBConfig, BaseModel):
11
+ host: str = ""
12
+ user: str = ""
13
+ password: SecretStr = ""
14
+
15
+ ak: str = ""
16
+ sk: SecretStr = ""
17
+ control_host: str = "searchengine.cn-hangzhou.aliyuncs.com"
18
+
19
+ def to_dict(self) -> dict:
20
+ return {
21
+ "host": self.host,
22
+ "user": self.user,
23
+ "password": self.password.get_secret_value(),
24
+ "ak": self.ak,
25
+ "sk": self.sk.get_secret_value(),
26
+ "control_host": self.control_host,
27
+ }
28
+
29
+ class AliyunOpenSearchIndexConfig(BaseModel, DBCaseConfig):
30
+ metric_type: MetricType = MetricType.L2
31
+ efConstruction: int = 500
32
+ M: int = 100
33
+ ef_search: int = 40
34
+
35
+ def distance_type(self) -> str:
36
+ if self.metric_type == MetricType.L2:
37
+ return "SquaredEuclidean"
38
+ elif self.metric_type == MetricType.IP:
39
+ return "InnerProduct"
40
+ elif self.metric_type == MetricType.COSINE:
41
+ return "InnerProduct"
42
+ return "SquaredEuclidean"
43
+
44
+ def index_param(self) -> dict:
45
+ return {}
46
+
47
+ def search_param(self) -> dict:
48
+ return {}
@@ -106,7 +106,7 @@ class AlloyDBScaNNTypedDict(AlloyDBTypedDict):
106
106
  int,
107
107
  click.option(
108
108
  "--max-num-levels",
109
- type=click.Choice([1, 2]),
109
+ type=click.Choice(["1", "2"]),
110
110
  help="Maximum number of levels",
111
111
  default=1
112
112
  )
@@ -204,6 +204,9 @@ class VectorDB(ABC):
204
204
  """
205
205
  raise NotImplementedError
206
206
 
207
+ def optimize_with_size(self, data_size: int):
208
+ self.optimize()
209
+
207
210
  # TODO: remove
208
211
  @abstractmethod
209
212
  def ready_to_load(self):