vectordb-bench 1.0.5__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/PKG-INFO +40 -8
  2. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/README.md +39 -7
  3. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/__init__.py +1 -0
  4. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/__init__.py +15 -0
  5. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/api.py +2 -0
  6. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +104 -40
  7. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aws_opensearch/cli.py +52 -15
  8. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aws_opensearch/config.py +27 -7
  9. vectordb_bench-1.0.7/vectordb_bench/backend/clients/hologres/cli.py +50 -0
  10. vectordb_bench-1.0.7/vectordb_bench/backend/clients/hologres/config.py +121 -0
  11. vectordb_bench-1.0.7/vectordb_bench/backend/clients/hologres/hologres.py +365 -0
  12. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/lancedb/lancedb.py +1 -0
  13. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/milvus/cli.py +25 -0
  14. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/milvus/config.py +2 -0
  15. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/milvus/milvus.py +1 -1
  16. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oceanbase/cli.py +1 -0
  17. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oceanbase/config.py +3 -1
  18. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oceanbase/oceanbase.py +20 -4
  19. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgdiskann/cli.py +45 -0
  20. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgdiskann/config.py +16 -0
  21. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +94 -26
  22. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -1
  23. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -1
  24. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/runner/rate_runner.py +23 -11
  25. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/cli/cli.py +36 -0
  26. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/cli/vectordbbench.py +2 -0
  27. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/config/dbCaseConfigs.py +82 -3
  28. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/config/styles.py +1 -0
  29. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/interface.py +5 -1
  30. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/models.py +3 -0
  31. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/getLeaderboardDataV2.py +23 -2
  32. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/leaderboard_v2.json +200 -0
  33. vectordb_bench-1.0.7/vectordb_bench/results/leaderboard_v2_streaming.json +128 -0
  34. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench.egg-info/PKG-INFO +40 -8
  35. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench.egg-info/SOURCES.txt +4 -0
  36. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/.devcontainer/Dockerfile +0 -0
  37. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/.devcontainer/devcontainer.json +0 -0
  38. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/.env.example +0 -0
  39. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/.github/workflows/publish_package_on_release.yml +0 -0
  40. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/.github/workflows/pull_request.yml +0 -0
  41. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/.gitignore +0 -0
  42. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/Dockerfile +0 -0
  43. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/LICENSE +0 -0
  44. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/Makefile +0 -0
  45. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/OWNERS +0 -0
  46. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/install/requirements_py3.11.txt +0 -0
  47. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/install.py +0 -0
  48. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/pyproject.toml +0 -0
  49. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/setup.cfg +0 -0
  50. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/conftest.py +0 -0
  51. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/pytest.ini +0 -0
  52. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_bench_runner.py +0 -0
  53. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_chroma.py +0 -0
  54. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_data_source.py +0 -0
  55. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_dataset.py +0 -0
  56. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_elasticsearch_cloud.py +0 -0
  57. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_models.py +0 -0
  58. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_rate_runner.py +0 -0
  59. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_redis.py +0 -0
  60. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/test_utils.py +0 -0
  61. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/tests/ut_cases.py +0 -0
  62. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/__main__.py +0 -0
  63. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/__init__.py +0 -0
  64. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/assembler.py +0 -0
  65. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/cases.py +0 -0
  66. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
  67. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
  68. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
  69. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
  70. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
  71. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
  72. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  73. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  74. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  75. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  76. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/clickhouse/cli.py +0 -0
  77. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/clickhouse/clickhouse.py +0 -0
  78. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/clickhouse/config.py +0 -0
  79. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  80. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  81. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/lancedb/cli.py +0 -0
  82. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/lancedb/config.py +0 -0
  83. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/mariadb/cli.py +0 -0
  84. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/mariadb/config.py +0 -0
  85. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/mariadb/mariadb.py +0 -0
  86. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  87. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  88. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
  89. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/mongodb/config.py +0 -0
  90. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/mongodb/mongodb.py +0 -0
  91. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oss_opensearch/cli.py +0 -0
  92. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oss_opensearch/config.py +0 -0
  93. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py +0 -0
  94. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/oss_opensearch/run.py +0 -0
  95. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  96. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  97. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  98. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  99. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  100. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  101. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  102. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  103. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
  104. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  105. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  106. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/qdrant_cloud/cli.py +0 -0
  107. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  108. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  109. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/qdrant_local/cli.py +0 -0
  110. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/qdrant_local/config.py +0 -0
  111. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +0 -0
  112. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  113. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/redis/config.py +0 -0
  114. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  115. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/s3_vectors/config.py +0 -0
  116. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/s3_vectors/s3_vectors.py +0 -0
  117. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/test/cli.py +0 -0
  118. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/test/config.py +0 -0
  119. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/test/test.py +0 -0
  120. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/tidb/cli.py +0 -0
  121. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/tidb/config.py +0 -0
  122. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/tidb/tidb.py +0 -0
  123. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/vespa/cli.py +0 -0
  124. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/vespa/config.py +0 -0
  125. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/vespa/util.py +0 -0
  126. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/vespa/vespa.py +0 -0
  127. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  128. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  129. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  130. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  131. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/data_source.py +0 -0
  132. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/dataset.py +0 -0
  133. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/filter.py +0 -0
  134. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/result_collector.py +0 -0
  135. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/runner/__init__.py +0 -0
  136. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/runner/mp_runner.py +0 -0
  137. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/runner/read_write_runner.py +0 -0
  138. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/runner/serial_runner.py +0 -0
  139. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/runner/util.py +0 -0
  140. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/task_runner.py +0 -0
  141. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/backend/utils.py +0 -0
  142. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/base.py +0 -0
  143. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/cli/__init__.py +0 -0
  144. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/cli/batch_cli.py +0 -0
  145. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/config-files/batch_sample_config.yml +0 -0
  146. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/config-files/sample_config.yml +0 -0
  147. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/custom/custom_case.json +0 -0
  148. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/custom_case_run_test.png +0 -0
  149. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/custom_dataset.png +0 -0
  150. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/bar-chart.png +0 -0
  151. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/concurrent.png +0 -0
  152. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/custom.png +0 -0
  153. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/label_filter.png +0 -0
  154. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/qp$.png +0 -0
  155. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/run_test.png +0 -0
  156. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/streaming.png +0 -0
  157. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/homepage/table.png +0 -0
  158. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/run_test_select_case.png +0 -0
  159. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/run_test_select_db.png +0 -0
  160. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/fig/run_test_submit.png +0 -0
  161. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  162. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  163. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  164. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  165. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  166. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  167. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  168. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  169. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  170. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  171. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
  172. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -0
  173. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
  174. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  175. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  176. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/int_filter/charts.py +0 -0
  177. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/label_filter/charts.py +0 -0
  178. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  179. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
  180. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  181. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  182. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  183. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  184. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  185. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/inputWidget.py +0 -0
  186. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
  187. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/streaming/charts.py +0 -0
  188. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/streaming/data.py +0 -0
  189. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/tables/data.py +0 -0
  190. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/welcome/explainPrams.py +0 -0
  191. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/welcome/pagestyle.py +0 -0
  192. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/components/welcome/welcomePrams.py +0 -0
  193. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  194. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/concurrent.py +0 -0
  195. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/custom.py +0 -0
  196. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/int_filter.py +0 -0
  197. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/label_filter.py +0 -0
  198. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
  199. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/results.py +0 -0
  200. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/run_test.py +0 -0
  201. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/streaming.py +0 -0
  202. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/pages/tables.py +0 -0
  203. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/utils.py +0 -0
  204. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/frontend/vdbbench.py +0 -0
  205. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/log_util.py +0 -0
  206. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/metric.py +0 -0
  207. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  208. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  209. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +0 -0
  210. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  211. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  212. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +0 -0
  213. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +0 -0
  214. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  215. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  216. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  217. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  218. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +0 -0
  219. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  220. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  221. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +0 -0
  222. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/S3Vectors/result_20250722_standard_s3vectors.json +0 -0
  223. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  224. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  225. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +0 -0
  226. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/dbPrices.json +0 -0
  227. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/getLeaderboardData.py +0 -0
  228. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench/results/leaderboard.json +0 -0
  229. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  230. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench.egg-info/entry_points.txt +0 -0
  231. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench.egg-info/requires.txt +0 -0
  232. {vectordb_bench-1.0.5 → vectordb_bench-1.0.7}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectordb-bench
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -153,6 +153,7 @@ All the database client supported
153
153
  | tidb | `pip install vectordb-bench[tidb]` |
154
154
  | vespa | `pip install vectordb-bench[vespa]` |
155
155
  | oceanbase | `pip install vectordb-bench[oceanbase]` |
156
+ | hologres | `pip install vectordb-bench[hologres]` |
156
157
 
157
158
  ### Run
158
159
 
@@ -269,7 +270,7 @@ vectordbbench awsopensearch --db-label awsopensearch \
269
270
  --m 16 --ef-construction 256 \
270
271
  --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
271
272
  --user vector --password '<password>' \
272
- --case-type Performance1536D5M --num-insert-workers 10 \
273
+ --case-type Performance1536D5M --number-of-indexing-clients 10 \
273
274
  --skip-load --num-concurrency 75
274
275
  ```
275
276
 
@@ -297,7 +298,7 @@ Options:
297
298
  --force-merge-enabled BOOLEAN Whether to perform force merge operation
298
299
  --flush-threshold-size TEXT Size threshold for flushing the transaction
299
300
  log
300
- --engine TEXT type of engine to use valid values [faiss, lucene]
301
+ --engine TEXT type of engine to use valid values [faiss, lucene, s3vector]
301
302
  # Memory Management
302
303
  --cb-threshold TEXT k-NN Memory circuit breaker threshold
303
304
 
@@ -372,6 +373,37 @@ Options:
372
373
  --help Show this message and exit. Show this message and exit.
373
374
  ```
374
375
 
376
+ ### Run Hologres from command line
377
+
378
+ Execute tests for the index types: HGraph.
379
+
380
+ ```shell
381
+ vectordbbench hologreshgraph --host xxx --port xxx --user ACCESS_ID --password ACCESS_KEY --database test \
382
+ --m 64 --ef-construction 400 --case-type Performance768D1M \
383
+ --index-type HGraph --ef-search 51 --k 10
384
+ ```
385
+
386
+ To list the options for Hologres, execute `vectordbbench hologreshgraph --help`, The following are some Hologres-specific command-line options.
387
+
388
+ ```text
389
+ $ vectordbbench hologreshgraph --help
390
+ Usage: vectordbbench hologreshgraph [OPTIONS]
391
+
392
+ Options:
393
+ [...]
394
+ --host TEXT Hologres host
395
+ --user TEXT Hologres username [required]
396
+ --password TEXT Hologres database password
397
+ --database TEXT Hologres database name [required]
398
+ --port INTEGER Hologres port [required]
399
+ --m INTEGER hnsw m [required]
400
+ --ef-construction INTEGER hnsw ef-construction [required]
401
+ --ef-search INTEGER hnsw ef-search [required]
402
+ --index-type [HGraph] Type of index to use. Supported values:
403
+ HGraph [required]
404
+ --help Show this message and exit.
405
+ ```
406
+
375
407
  #### Using a configuration file.
376
408
 
377
409
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -525,9 +557,9 @@ All standard benchmark results are generated by a client running on an 8 core, 3
525
557
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
526
558
  3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
527
559
  Now we can only run one task at the same time.
528
- ![image](fig/run_test_select_db.png)
529
- ![image](fig/run_test_select_case.png)
530
- ![image](fig/run_test_submit.png)
560
+ ![image](vectordb_bench/fig/run_test_select_db.png)
561
+ ![image](vectordb_bench/fig/run_test_select_case.png)
562
+ ![image](vectordb_bench/fig/run_test_submit.png)
531
563
 
532
564
 
533
565
  ## Module
@@ -557,8 +589,8 @@ Each case provides an in-depth examination of a vector database's abilities, pro
557
589
 
558
590
  Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
559
591
 
560
- ![image](fig/custom_dataset.png)
561
- ![image](fig/custom_case_run_test.png)
592
+ ![image](vectordb_bench/fig/custom_dataset.png)
593
+ ![image](vectordb_bench/fig/custom_case_run_test.png)
562
594
 
563
595
  We have strict requirements for the data set format, please follow them.
564
596
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
@@ -58,6 +58,7 @@ All the database client supported
58
58
  | tidb | `pip install vectordb-bench[tidb]` |
59
59
  | vespa | `pip install vectordb-bench[vespa]` |
60
60
  | oceanbase | `pip install vectordb-bench[oceanbase]` |
61
+ | hologres | `pip install vectordb-bench[hologres]` |
61
62
 
62
63
  ### Run
63
64
 
@@ -174,7 +175,7 @@ vectordbbench awsopensearch --db-label awsopensearch \
174
175
  --m 16 --ef-construction 256 \
175
176
  --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
176
177
  --user vector --password '<password>' \
177
- --case-type Performance1536D5M --num-insert-workers 10 \
178
+ --case-type Performance1536D5M --number-of-indexing-clients 10 \
178
179
  --skip-load --num-concurrency 75
179
180
  ```
180
181
 
@@ -202,7 +203,7 @@ Options:
202
203
  --force-merge-enabled BOOLEAN Whether to perform force merge operation
203
204
  --flush-threshold-size TEXT Size threshold for flushing the transaction
204
205
  log
205
- --engine TEXT type of engine to use valid values [faiss, lucene]
206
+ --engine TEXT type of engine to use valid values [faiss, lucene, s3vector]
206
207
  # Memory Management
207
208
  --cb-threshold TEXT k-NN Memory circuit breaker threshold
208
209
 
@@ -277,6 +278,37 @@ Options:
277
278
  --help Show this message and exit. Show this message and exit.
278
279
  ```
279
280
 
281
+ ### Run Hologres from command line
282
+
283
+ Execute tests for the index types: HGraph.
284
+
285
+ ```shell
286
+ vectordbbench hologreshgraph --host xxx --port xxx --user ACCESS_ID --password ACCESS_KEY --database test \
287
+ --m 64 --ef-construction 400 --case-type Performance768D1M \
288
+ --index-type HGraph --ef-search 51 --k 10
289
+ ```
290
+
291
+ To list the options for Hologres, execute `vectordbbench hologreshgraph --help`, The following are some Hologres-specific command-line options.
292
+
293
+ ```text
294
+ $ vectordbbench hologreshgraph --help
295
+ Usage: vectordbbench hologreshgraph [OPTIONS]
296
+
297
+ Options:
298
+ [...]
299
+ --host TEXT Hologres host
300
+ --user TEXT Hologres username [required]
301
+ --password TEXT Hologres database password
302
+ --database TEXT Hologres database name [required]
303
+ --port INTEGER Hologres port [required]
304
+ --m INTEGER hnsw m [required]
305
+ --ef-construction INTEGER hnsw ef-construction [required]
306
+ --ef-search INTEGER hnsw ef-search [required]
307
+ --index-type [HGraph] Type of index to use. Supported values:
308
+ HGraph [required]
309
+ --help Show this message and exit.
310
+ ```
311
+
280
312
  #### Using a configuration file.
281
313
 
282
314
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -430,9 +462,9 @@ All standard benchmark results are generated by a client running on an 8 core, 3
430
462
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
431
463
  3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
432
464
  Now we can only run one task at the same time.
433
- ![image](fig/run_test_select_db.png)
434
- ![image](fig/run_test_select_case.png)
435
- ![image](fig/run_test_submit.png)
465
+ ![image](vectordb_bench/fig/run_test_select_db.png)
466
+ ![image](vectordb_bench/fig/run_test_select_case.png)
467
+ ![image](vectordb_bench/fig/run_test_submit.png)
436
468
 
437
469
 
438
470
  ## Module
@@ -462,8 +494,8 @@ Each case provides an in-depth examination of a vector database's abilities, pro
462
494
 
463
495
  Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
464
496
 
465
- ![image](fig/custom_dataset.png)
466
- ![image](fig/custom_case_run_test.png)
497
+ ![image](vectordb_bench/fig/custom_dataset.png)
498
+ ![image](vectordb_bench/fig/custom_case_run_test.png)
467
499
 
468
500
  We have strict requirements for the data set format, please follow them.
469
501
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
@@ -16,6 +16,7 @@ class config:
16
16
  LOG_LEVEL = env.str("LOG_LEVEL", "INFO")
17
17
 
18
18
  DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL)
19
+ DATASET_SOURCE = env.str("DATASET_SOURCE", "S3") # Options "S3" or "AliyunOSS"
19
20
  DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", "/tmp/vectordb_bench/dataset")
20
21
  NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100)
21
22
  TIME_PER_BATCH = 1 # 1s. for streaming insertion.
@@ -50,6 +50,7 @@ class DB(Enum):
50
50
  LanceDB = "LanceDB"
51
51
  OceanBase = "OceanBase"
52
52
  S3Vectors = "S3Vectors"
53
+ Hologres = "Alibaba Cloud Hologres"
53
54
 
54
55
  @property
55
56
  def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
@@ -194,6 +195,11 @@ class DB(Enum):
194
195
 
195
196
  return S3Vectors
196
197
 
198
+ if self == DB.Hologres:
199
+ from .hologres.hologres import Hologres
200
+
201
+ return Hologres
202
+
197
203
  msg = f"Unknown DB: {self.name}"
198
204
  raise ValueError(msg)
199
205
 
@@ -340,6 +346,11 @@ class DB(Enum):
340
346
 
341
347
  return S3VectorsConfig
342
348
 
349
+ if self == DB.Hologres:
350
+ from .hologres.config import HologresConfig
351
+
352
+ return HologresConfig
353
+
343
354
  msg = f"Unknown DB: {self.name}"
344
355
  raise ValueError(msg)
345
356
 
@@ -461,6 +472,10 @@ class DB(Enum):
461
472
  from .s3_vectors.config import S3VectorsIndexConfig
462
473
 
463
474
  return S3VectorsIndexConfig
475
+ if self == DB.Hologres:
476
+ from .hologres.config import HologresIndexConfig
477
+
478
+ return HologresIndexConfig
464
479
 
465
480
  # DB.Pinecone, DB.Chroma, DB.Redis
466
481
  return EmptyDBCaseConfig
@@ -40,6 +40,8 @@ class IndexType(str, Enum):
40
40
  GPU_IVF_PQ = "GPU_IVF_PQ"
41
41
  GPU_CAGRA = "GPU_CAGRA"
42
42
  SCANN = "scann"
43
+ Hologres_HGraph = "HGraph"
44
+ Hologres_Graph = "Graph"
43
45
  NONE = "NONE"
44
46
 
45
47
 
@@ -65,9 +65,7 @@ class AWSOpenSearch(VectorDB):
65
65
  self._load_graphs_to_memory(client)
66
66
 
67
67
  def _create_index(self, client: OpenSearch) -> None:
68
- ef_search_value = (
69
- self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
70
- )
68
+ ef_search_value = self.case_config.ef_search
71
69
  log.info(f"Creating index with ef_search: {ef_search_value}")
72
70
  log.info(f"Creating index with number_of_replicas: {self.case_config.number_of_replicas}")
73
71
 
@@ -81,7 +79,7 @@ class AWSOpenSearch(VectorDB):
81
79
  "knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
82
80
  }
83
81
  }
84
- client.cluster.put_settings(cluster_settings_body)
82
+ client.cluster.put_settings(body=cluster_settings_body)
85
83
  settings = {
86
84
  "index": {
87
85
  "knn": True,
@@ -93,25 +91,83 @@ class AWSOpenSearch(VectorDB):
93
91
  "refresh_interval": self.case_config.refresh_interval,
94
92
  }
95
93
  settings["index"]["knn.algo_param.ef_search"] = ef_search_value
96
- mappings = {
97
- "_source": {"excludes": [self.vector_col_name], "recovery_source_excludes": [self.vector_col_name]},
98
- "properties": {
99
- self.id_col_name: {"type": "integer", "store": True},
100
- self.label_col_name: {"type": "keyword"},
101
- self.vector_col_name: {
102
- "type": "knn_vector",
103
- "dimension": self.dim,
104
- "method": self.case_config.index_param(),
105
- },
106
- },
94
+
95
+ # Get method configuration and log it for debugging
96
+ method_config = self.case_config.index_param()
97
+ log.info(f"Raw method config from index_param(): {method_config}")
98
+
99
+ # For s3vector engine, ensure method only contains engine field
100
+ if self.case_config.engine == AWSOS_Engine.s3vector:
101
+ method_config = {"engine": "s3vector"}
102
+ log.info(f"Cleaned method config for s3vector: {method_config}")
103
+
104
+ # Prepare vector field configuration
105
+ vector_field_config = {
106
+ "type": "knn_vector",
107
+ "store": True,
108
+ "dimension": self.dim,
109
+ "method": method_config,
107
110
  }
111
+
112
+ # For s3vector engine, space_type should be set at the vector field level
113
+ if self.case_config.engine == AWSOS_Engine.s3vector:
114
+ space_type = self.case_config.parse_metric()
115
+ vector_field_config["space_type"] = space_type
116
+
117
+ # Ensure method config is absolutely clean for s3vector - remove any potential extra fields
118
+ vector_field_config["method"] = {"engine": "s3vector"}
119
+
120
+ log.info(f"Setting space_type '{space_type}' at vector field level for s3vector engine")
121
+ log.info(f"Final vector field config for s3vector: {vector_field_config}")
122
+
123
+ # Configure mappings based on engine type
124
+ if self.case_config.engine == AWSOS_Engine.s3vector:
125
+ # For s3vector engine, use simplified mappings without _source configuration
126
+ mappings = {
127
+ "properties": {
128
+ # self.id_col_name: {"type": "integer", "store": True},
129
+ self.label_col_name: {"type": "keyword"},
130
+ self.vector_col_name: vector_field_config,
131
+ },
132
+ }
133
+ log.info("Using simplified mappings for s3vector engine (no _source configuration)")
134
+ else:
135
+ # For other engines (faiss, lucene), use standard mappings with _source configuration
136
+ mappings = {
137
+ "_source": {"excludes": [self.vector_col_name], "recovery_source_excludes": [self.vector_col_name]},
138
+ "properties": {
139
+ # self.id_col_name: {"type": "integer", "store": True},
140
+ self.label_col_name: {"type": "keyword"},
141
+ self.vector_col_name: vector_field_config,
142
+ },
143
+ }
144
+ log.info("Using standard mappings with _source configuration for non-s3vector engines")
108
145
  try:
109
146
  log.info(f"Creating index with settings: {settings}")
110
147
  log.info(f"Creating index with mappings: {mappings}")
148
+
149
+ # Additional logging for s3vector to confirm method config before sending
150
+ if self.case_config.engine == AWSOS_Engine.s3vector:
151
+ method_in_mappings = mappings["properties"][self.vector_col_name]["method"]
152
+ log.info(f"Final method config being sent to OpenSearch: {method_in_mappings}")
153
+
111
154
  client.indices.create(
112
155
  index=self.index_name,
113
156
  body={"settings": settings, "mappings": mappings},
114
157
  )
158
+
159
+ # For s3vector, verify the actual index configuration after creation
160
+ if self.case_config.engine == AWSOS_Engine.s3vector:
161
+ try:
162
+ actual_mapping = client.indices.get_mapping(index=self.index_name)
163
+ actual_method = actual_mapping[self.index_name]["mappings"]["properties"][self.vector_col_name][
164
+ "method"
165
+ ]
166
+ log.info(f"Actual method config in created index: {actual_method}")
167
+
168
+ except Exception as e:
169
+ log.warning(f"Failed to verify index configuration: {e}")
170
+
115
171
  except Exception as e:
116
172
  log.warning(f"Failed to create index: {self.index_name} error: {e!s}")
117
173
  raise e from None
@@ -153,12 +209,12 @@ class AWSOpenSearch(VectorDB):
153
209
  insert_data = []
154
210
  for i in range(len(embeddings)):
155
211
  index_data = {"index": {"_index": self.index_name, self.id_col_name: metadata[i]}}
156
- if self.with_scalar_labels and self.case_config.use_routing:
212
+ if self.with_scalar_labels and self.case_config.use_routing and labels_data is not None:
157
213
  index_data["routing"] = labels_data[i]
158
214
  insert_data.append(index_data)
159
215
 
160
216
  other_data = {self.vector_col_name: embeddings[i]}
161
- if self.with_scalar_labels:
217
+ if self.with_scalar_labels and labels_data is not None:
162
218
  other_data[self.label_col_name] = labels_data[i]
163
219
  insert_data.append(other_data)
164
220
 
@@ -168,7 +224,7 @@ class AWSOpenSearch(VectorDB):
168
224
  except Exception as e:
169
225
  log.warning(f"Failed to insert data: {self.index_name} error: {e!s}")
170
226
  time.sleep(10)
171
- return self._insert_with_single_client(embeddings, metadata)
227
+ return self._insert_with_single_client(embeddings, metadata, labels_data)
172
228
 
173
229
  def _insert_with_multiple_clients(
174
230
  self,
@@ -186,7 +242,8 @@ class AWSOpenSearch(VectorDB):
186
242
 
187
243
  for i in range(0, len(embeddings_list), chunk_size):
188
244
  end = min(i + chunk_size, len(embeddings_list))
189
- chunks.append((embeddings_list[i:end], metadata[i:end], labels_data[i:end]))
245
+ chunk_labels = labels_data[i:end] if labels_data is not None else None
246
+ chunks.append((embeddings_list[i:end], metadata[i:end], chunk_labels))
190
247
 
191
248
  clients = []
192
249
  for _ in range(min(num_clients, len(chunks))):
@@ -202,12 +259,12 @@ class AWSOpenSearch(VectorDB):
202
259
  insert_data = []
203
260
  for i in range(len(chunk_embeddings)):
204
261
  index_data = {"index": {"_index": self.index_name, self.id_col_name: chunk_metadata[i]}}
205
- if self.with_scalar_labels and self.case_config.use_routing:
262
+ if self.with_scalar_labels and self.case_config.use_routing and chunk_labels_data is not None:
206
263
  index_data["routing"] = chunk_labels_data[i]
207
264
  insert_data.append(index_data)
208
265
 
209
266
  other_data = {self.vector_col_name: chunk_embeddings[i]}
210
- if self.with_scalar_labels:
267
+ if self.with_scalar_labels and chunk_labels_data is not None:
211
268
  other_data[self.label_col_name] = chunk_labels_data[i]
212
269
  insert_data.append(other_data)
213
270
 
@@ -254,10 +311,7 @@ class AWSOpenSearch(VectorDB):
254
311
  return (total_count, None)
255
312
 
256
313
  def _update_ef_search_before_search(self, client: OpenSearch):
257
- ef_search_value = (
258
- self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
259
- )
260
-
314
+ ef_search_value = self.case_config.ef_search
261
315
  try:
262
316
  index_settings = client.indices.get_settings(index=self.index_name)
263
317
  current_ef_search = (
@@ -297,23 +351,33 @@ class AWSOpenSearch(VectorDB):
297
351
  """
298
352
  assert self.client is not None, "should self.init() first"
299
353
 
354
+ # Configure query based on engine type
355
+ if self.case_config.engine == AWSOS_Engine.s3vector:
356
+ # For s3vector engine, use simplified query without method_parameters
357
+ knn_query = {
358
+ "vector": query,
359
+ "k": k,
360
+ **({"filter": self.filter} if self.filter else {}),
361
+ }
362
+ log.debug("Using simplified knn query for s3vector engine (no method_parameters)")
363
+ else:
364
+ # For other engines (faiss, lucene), use standard query with method_parameters
365
+ knn_query = {
366
+ "vector": query,
367
+ "k": k,
368
+ "method_parameters": self.case_config.search_param(),
369
+ **({"filter": self.filter} if self.filter else {}),
370
+ **(
371
+ {"rescore": {"oversample_factor": self.case_config.oversample_factor}}
372
+ if self.case_config.use_quant
373
+ else {}
374
+ ),
375
+ }
376
+ log.debug("Using standard knn query with method_parameters for non-s3vector engines")
377
+
300
378
  body = {
301
379
  "size": k,
302
- "query": {
303
- "knn": {
304
- self.vector_col_name: {
305
- "vector": query,
306
- "k": k,
307
- "method_parameters": self.case_config.search_param(),
308
- **({"filter": self.filter} if self.filter else {}),
309
- **(
310
- {"rescore": {"oversample_factor": self.case_config.oversample_factor}}
311
- if self.case_config.use_quant
312
- else {}
313
- ),
314
- }
315
- }
316
- },
380
+ "query": {"knn": {self.vector_col_name: knn_query}},
317
381
  }
318
382
 
319
383
  try:
@@ -46,7 +46,7 @@ class AWSOpenSearchTypedDict(TypedDict):
46
46
  str,
47
47
  click.option(
48
48
  "--engine",
49
- type=click.Choice(["nmslib", "faiss", "lucene"], case_sensitive=False),
49
+ type=click.Choice(["faiss", "lucene", "s3vector"], case_sensitive=False),
50
50
  help="HNSW algorithm implementation to use",
51
51
  default="faiss",
52
52
  ),
@@ -96,24 +96,44 @@ class AWSOpenSearchTypedDict(TypedDict):
96
96
  ),
97
97
  ]
98
98
 
99
- quantization_type: Annotated[
100
- str | None,
99
+ index_thread_qty_during_force_merge: Annotated[
100
+ int,
101
101
  click.option(
102
- "--quantization-type",
103
- type=click.Choice(["fp32", "fp16"]),
104
- help="quantization type for vectors (in index)",
105
- default="fp32",
102
+ "--index-thread-qty-during-force-merge",
103
+ type=int,
104
+ help="Thread count during force merge operations",
105
+ default=8,
106
+ ),
107
+ ]
108
+
109
+ number_of_indexing_clients: Annotated[
110
+ int,
111
+ click.option(
112
+ "--number-of-indexing-clients",
113
+ type=int,
114
+ help="Number of concurrent indexing clients",
115
+ default=1,
116
+ ),
117
+ ]
118
+
119
+ ef_construction: Annotated[
120
+ int | None,
121
+ click.option(
122
+ "--ef-construction",
123
+ type=int,
124
+ help="ef parameter for HNSW construction (not used for s3vector engine)",
125
+ default=None,
106
126
  required=False,
107
127
  ),
108
128
  ]
109
129
 
110
- engine: Annotated[
130
+ quantization_type: Annotated[
111
131
  str | None,
112
132
  click.option(
113
- "--engine",
114
- type=click.Choice(["faiss", "lucene"]),
133
+ "--quantization-type",
134
+ type=click.Choice(["fp32", "fp16"]),
115
135
  help="quantization type for vectors (in index)",
116
- default="faiss",
136
+ default="fp32",
117
137
  required=False,
118
138
  ),
119
139
  ]
@@ -127,6 +147,21 @@ class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFl
127
147
  def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
128
148
  from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
129
149
 
150
+ # Set default values for HNSW parameters if not provided and not using s3vector
151
+ engine = AWSOS_Engine(parameters["engine"])
152
+ ef_construction = parameters.get("ef_construction")
153
+ ef_search = parameters.get("ef_search")
154
+ m = parameters.get("m")
155
+
156
+ # For non-s3vector engines, provide defaults if None
157
+ if engine != AWSOS_Engine.s3vector:
158
+ if ef_construction is None:
159
+ ef_construction = 200
160
+ if ef_search is None:
161
+ ef_search = 100
162
+ if m is None:
163
+ m = 16
164
+
130
165
  run(
131
166
  db=DB.AWSOpenSearch,
132
167
  db_config=AWSOpenSearchConfig(
@@ -144,12 +179,14 @@ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
144
179
  force_merge_enabled=parameters["force_merge_enabled"],
145
180
  flush_threshold_size=parameters["flush_threshold_size"],
146
181
  index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
182
+ number_of_indexing_clients=parameters["number_of_indexing_clients"],
147
183
  cb_threshold=parameters["cb_threshold"],
148
- efConstruction=parameters["ef_construction"],
149
- efSearch=parameters["ef_runtime"],
150
- M=parameters["m"],
151
- engine=AWSOS_Engine(parameters["engine"]),
184
+ efConstruction=ef_construction,
185
+ ef_search=ef_search,
186
+ M=m,
187
+ engine=engine,
152
188
  quantization_type=AWSOSQuantization(parameters["quantization_type"]),
189
+ metric_type_name=parameters["metric_type"],
153
190
  ),
154
191
  **parameters,
155
192
  )
@@ -34,6 +34,7 @@ class AWSOpenSearchConfig(DBConfig, BaseModel):
34
34
  class AWSOS_Engine(Enum):
35
35
  faiss = "faiss"
36
36
  lucene = "lucene"
37
+ s3vector = "s3vector"
37
38
 
38
39
 
39
40
  class AWSOSQuantization(Enum):
@@ -44,11 +45,11 @@ class AWSOSQuantization(Enum):
44
45
  class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
45
46
  metric_type: MetricType = MetricType.L2
46
47
  engine: AWSOS_Engine = AWSOS_Engine.faiss
47
- efConstruction: int = 256
48
- efSearch: int = 100
48
+ efConstruction: int | None = 256
49
+ ef_search: int | None = 100
49
50
  engine_name: str | None = None
50
51
  metric_type_name: str | None = None
51
- M: int = 16
52
+ M: int | None = 16
52
53
  index_thread_qty: int | None = 4
53
54
  number_of_shards: int | None = 1
54
55
  number_of_replicas: int | None = 0
@@ -91,6 +92,13 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
91
92
 
92
93
  def parse_metric(self) -> str:
93
94
  log.info(f"User specified metric_type: {self.metric_type_name}")
95
+
96
+ # Handle None or empty metric_type_name
97
+ if self.metric_type_name is None or self.metric_type_name == "":
98
+ log.info("No metric_type_name specified, defaulting to l2")
99
+ self.metric_type = MetricType.L2
100
+ return "l2"
101
+
94
102
  self.metric_type = MetricType[self.metric_type_name.upper()]
95
103
  if self.metric_type == MetricType.IP:
96
104
  return "innerproduct"
@@ -108,20 +116,28 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
108
116
  def index_param(self) -> dict:
109
117
  log.info(f"Using engine: {self.engine} for index creation")
110
118
  log.info(f"Using metric_type: {self.metric_type_name} for index creation")
111
- log.info(f"Resulting space_type: {self.parse_metric()} for index creation")
119
+ space_type = self.parse_metric()
120
+ log.info(f"Resulting space_type: {space_type} for index creation")
121
+
122
+ # Handle s3vector engine with simplified configuration
123
+ # For s3vector, space_type should be set at the vector field level, not in method
124
+ if self.engine == AWSOS_Engine.s3vector:
125
+ return {"engine": "s3vector"}
112
126
 
113
127
  parameters = {"ef_construction": self.efConstruction, "m": self.M}
114
128
 
115
- if self.engine == AWSOS_Engine.faiss and self.faiss_use_fp16:
129
+ if self.engine == AWSOS_Engine.faiss and self.quantization_type == AWSOSQuantization.fp16:
116
130
  parameters["encoder"] = {"name": "sq", "parameters": {"type": "fp16"}}
117
131
 
132
+ # For other engines (faiss, lucene), space_type is set at method level
118
133
  return {
119
134
  "name": "hnsw",
120
135
  "engine": self.engine.value,
136
+ "space_type": space_type,
121
137
  "parameters": {
122
138
  "ef_construction": self.efConstruction,
123
139
  "m": self.M,
124
- "ef_search": self.efSearch,
140
+ "ef_search": self.ef_search,
125
141
  **(
126
142
  {"encoder": {"name": "sq", "parameters": {"type": self.quantization_type.fp16.value}}}
127
143
  if self.use_quant
@@ -131,4 +147,8 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
131
147
  }
132
148
 
133
149
  def search_param(self) -> dict:
134
- return {"ef_search": self.efSearch}
150
+ # s3vector engine doesn't use ef_search parameter
151
+ if self.engine == AWSOS_Engine.s3vector:
152
+ return {}
153
+
154
+ return {"ef_search": self.ef_search}