vectordb-bench 0.0.13__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/PKG-INFO +33 -3
  2. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/README.md +26 -0
  3. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/pyproject.toml +3 -2
  4. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/__init__.py +14 -13
  5. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/__init__.py +13 -0
  6. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/api.py +2 -0
  7. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +47 -6
  8. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/aws_opensearch/config.py +12 -6
  9. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/aws_opensearch/run.py +34 -3
  10. vectordb_bench-0.0.15/vectordb_bench/backend/clients/pgdiskann/cli.py +99 -0
  11. vectordb_bench-0.0.15/vectordb_bench/backend/clients/pgdiskann/config.py +145 -0
  12. vectordb_bench-0.0.13/vectordb_bench/backend/clients/pgvector/pgvector.py → vectordb_bench-0.0.15/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +48 -48
  13. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvector/cli.py +62 -1
  14. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvector/config.py +48 -10
  15. vectordb_bench-0.0.15/vectordb_bench/backend/clients/pgvector/pgvector.py +469 -0
  16. vectordb_bench-0.0.15/vectordb_bench/backend/clients/pgvectorscale/cli.py +108 -0
  17. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +22 -4
  18. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pinecone/config.py +0 -2
  19. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pinecone/pinecone.py +34 -36
  20. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/redis/cli.py +8 -0
  21. vectordb_bench-0.0.15/vectordb_bench/backend/clients/redis/config.py +45 -0
  22. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -1
  23. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/runner/mp_runner.py +2 -1
  24. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/cli/cli.py +137 -0
  25. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/cli/vectordbbench.py +4 -1
  26. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/charts.py +9 -6
  27. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/concurrent/charts.py +3 -6
  28. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/caseSelector.py +6 -0
  29. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/config/dbCaseConfigs.py +165 -1
  30. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/pages/quries_per_dollar.py +13 -5
  31. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/vdb_benchmark.py +11 -3
  32. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/models.py +13 -3
  33. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +53 -1
  34. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +48 -0
  35. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +29 -1
  36. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +24 -0
  37. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +98 -49
  38. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/getLeaderboardData.py +17 -7
  39. vectordb_bench-0.0.15/vectordb_bench/results/leaderboard.json +1 -0
  40. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench.egg-info/PKG-INFO +33 -3
  41. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench.egg-info/SOURCES.txt +4 -0
  42. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench.egg-info/requires.txt +7 -2
  43. vectordb_bench-0.0.13/vectordb_bench/backend/clients/redis/config.py +0 -14
  44. vectordb_bench-0.0.13/vectordb_bench/results/leaderboard.json +0 -1
  45. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.devcontainer/Dockerfile +0 -0
  46. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.devcontainer/devcontainer.json +0 -0
  47. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.env.example +0 -0
  48. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.github/workflows/publish_package_on_release.yml +0 -0
  49. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.github/workflows/pull_request.yml +0 -0
  50. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.gitignore +0 -0
  51. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/.ruff.toml +0 -0
  52. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/Dockerfile +0 -0
  53. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/LICENSE +0 -0
  54. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/Makefile +0 -0
  55. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/OWNERS +0 -0
  56. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/fig/custom_case_run_test.png +0 -0
  57. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/fig/custom_dataset.png +0 -0
  58. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/install/requirements_py3.11.txt +0 -0
  59. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/install.py +0 -0
  60. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/setup.cfg +0 -0
  61. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/conftest.py +0 -0
  62. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/pytest.ini +0 -0
  63. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_bench_runner.py +0 -0
  64. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_chroma.py +0 -0
  65. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_data_source.py +0 -0
  66. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_dataset.py +0 -0
  67. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_elasticsearch_cloud.py +0 -0
  68. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_models.py +0 -0
  69. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_redis.py +0 -0
  70. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/test_utils.py +0 -0
  71. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/tests/ut_cases.py +0 -0
  72. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/__main__.py +0 -0
  73. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/__init__.py +0 -0
  74. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/assembler.py +0 -0
  75. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/cases.py +0 -0
  76. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -0
  77. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  78. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  79. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  80. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  81. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  82. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  83. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
  84. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/milvus/cli.py +0 -0
  85. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/milvus/config.py +0 -0
  86. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/milvus/milvus.py +0 -0
  87. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  88. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  89. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  90. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  91. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  92. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  93. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  94. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/test/cli.py +0 -0
  95. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/test/config.py +0 -0
  96. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/test/test.py +0 -0
  97. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  98. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  99. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  100. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  101. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  102. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/data_source.py +0 -0
  103. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/dataset.py +0 -0
  104. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/result_collector.py +0 -0
  105. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/runner/__init__.py +0 -0
  106. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/runner/serial_runner.py +0 -0
  107. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/task_runner.py +0 -0
  108. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/backend/utils.py +0 -0
  109. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/base.py +0 -0
  110. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/cli/__init__.py +0 -0
  111. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/config-files/sample_config.yml +0 -0
  112. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/custom/custom_case.json +0 -0
  113. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  114. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  115. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  116. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  117. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  118. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  119. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  120. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  121. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
  122. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -0
  123. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
  124. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  125. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  126. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  127. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  128. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  129. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  130. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  131. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  132. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
  133. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/components/tables/data.py +0 -0
  134. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  135. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/config/styles.py +0 -0
  136. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/pages/concurrent.py +0 -0
  137. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/pages/custom.py +0 -0
  138. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/pages/run_test.py +0 -0
  139. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/pages/tables.py +0 -0
  140. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/frontend/utils.py +0 -0
  141. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/interface.py +0 -0
  142. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/log_util.py +0 -0
  143. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/metric.py +0 -0
  144. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  145. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  146. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  147. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  148. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  149. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  150. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  151. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  152. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  153. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  154. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench/results/dbPrices.json +0 -0
  155. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  156. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench.egg-info/entry_points.txt +0 -0
  157. {vectordb_bench-0.0.13 → vectordb_bench-0.0.15}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.13
3
+ Version: 0.0.15
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -36,7 +36,7 @@ Requires-Dist: pinecone-client; extra == "all"
36
36
  Requires-Dist: weaviate-client; extra == "all"
37
37
  Requires-Dist: elasticsearch; extra == "all"
38
38
  Requires-Dist: pgvector; extra == "all"
39
- Requires-Dist: pgvecto_rs[psycopg3]>=0.2.1; extra == "all"
39
+ Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
40
40
  Requires-Dist: sqlalchemy; extra == "all"
41
41
  Requires-Dist: redis; extra == "all"
42
42
  Requires-Dist: chromadb; extra == "all"
@@ -60,8 +60,12 @@ Provides-Extra: pgvectorscale
60
60
  Requires-Dist: psycopg; extra == "pgvectorscale"
61
61
  Requires-Dist: psycopg-binary; extra == "pgvectorscale"
62
62
  Requires-Dist: pgvector; extra == "pgvectorscale"
63
+ Provides-Extra: pgdiskann
64
+ Requires-Dist: psycopg; extra == "pgdiskann"
65
+ Requires-Dist: psycopg-binary; extra == "pgdiskann"
66
+ Requires-Dist: pgvector; extra == "pgdiskann"
63
67
  Provides-Extra: pgvecto-rs
64
- Requires-Dist: pgvecto_rs[psycopg3]>=0.2.1; extra == "pgvecto-rs"
68
+ Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "pgvecto-rs"
65
69
  Provides-Extra: redis
66
70
  Requires-Dist: redis; extra == "redis"
67
71
  Provides-Extra: memorydb
@@ -112,6 +116,7 @@ All the database client supported
112
116
  | pgvector | `pip install vectordb-bench[pgvector]` |
113
117
  | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
114
118
  | pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
119
+ | pgdiskann | `pip install vectordb-bench[pgdiskann]` |
115
120
  | redis | `pip install vectordb-bench[redis]` |
116
121
  | memorydb | `pip install vectordb-bench[memorydb]` |
117
122
  | chromadb | `pip install vectordb-bench[chromadb]` |
@@ -191,6 +196,29 @@ Options:
191
196
  --m INTEGER hnsw m
192
197
  --ef-construction INTEGER hnsw ef-construction
193
198
  --ef-search INTEGER hnsw ef-search
199
+ --quantization-type [none|bit|halfvec]
200
+ quantization type for vectors
201
+ --custom-case-name TEXT Custom case name i.e. PerformanceCase1536D50K
202
+ --custom-case-description TEXT Custom name description
203
+ --custom-case-load-timeout INTEGER
204
+ Custom case load timeout [default: 36000]
205
+ --custom-case-optimize-timeout INTEGER
206
+ Custom case optimize timeout [default: 36000]
207
+ --custom-dataset-name TEXT
208
+ Dataset name i.e OpenAI
209
+ --custom-dataset-dir TEXT Dataset directory i.e. openai_medium_500k
210
+ --custom-dataset-size INTEGER Dataset size i.e. 500000
211
+ --custom-dataset-dim INTEGER Dataset dimension
212
+ --custom-dataset-metric-type TEXT
213
+ Dataset distance metric [default: COSINE]
214
+ --custom-dataset-file-count INTEGER
215
+ Dataset file count
216
+ --custom-dataset-use-shuffled / --skip-custom-dataset-use-shuffled
217
+ Use shuffled custom dataset or skip [default: custom-dataset-
218
+ use-shuffled]
219
+ --custom-dataset-with-gt / --skip-custom-dataset-with-gt
220
+ Custom dataset with ground truth or skip [default: custom-dataset-
221
+ with-gt]
194
222
  --help Show this message and exit.
195
223
  ```
196
224
  #### Using a configuration file.
@@ -535,6 +563,8 @@ def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
535
563
  3. Update db_config and db_case_config to match client requirements
536
564
  4. Continue to add new functions for each index config.
537
565
  5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
566
+ 6. Import the `get_custom_case_config` function from `vectordb_bench/cli/cli.py` and use it to add a new key `custom_case` to the `parameters` variable within the command.
567
+
538
568
 
539
569
  > cli modules with multiple index configs:
540
570
  > - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
@@ -38,6 +38,7 @@ All the database client supported
38
38
  | pgvector | `pip install vectordb-bench[pgvector]` |
39
39
  | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
40
40
  | pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
41
+ | pgdiskann | `pip install vectordb-bench[pgdiskann]` |
41
42
  | redis | `pip install vectordb-bench[redis]` |
42
43
  | memorydb | `pip install vectordb-bench[memorydb]` |
43
44
  | chromadb | `pip install vectordb-bench[chromadb]` |
@@ -117,6 +118,29 @@ Options:
117
118
  --m INTEGER hnsw m
118
119
  --ef-construction INTEGER hnsw ef-construction
119
120
  --ef-search INTEGER hnsw ef-search
121
+ --quantization-type [none|bit|halfvec]
122
+ quantization type for vectors
123
+ --custom-case-name TEXT Custom case name i.e. PerformanceCase1536D50K
124
+ --custom-case-description TEXT Custom name description
125
+ --custom-case-load-timeout INTEGER
126
+ Custom case load timeout [default: 36000]
127
+ --custom-case-optimize-timeout INTEGER
128
+ Custom case optimize timeout [default: 36000]
129
+ --custom-dataset-name TEXT
130
+ Dataset name i.e OpenAI
131
+ --custom-dataset-dir TEXT Dataset directory i.e. openai_medium_500k
132
+ --custom-dataset-size INTEGER Dataset size i.e. 500000
133
+ --custom-dataset-dim INTEGER Dataset dimension
134
+ --custom-dataset-metric-type TEXT
135
+ Dataset distance metric [default: COSINE]
136
+ --custom-dataset-file-count INTEGER
137
+ Dataset file count
138
+ --custom-dataset-use-shuffled / --skip-custom-dataset-use-shuffled
139
+ Use shuffled custom dataset or skip [default: custom-dataset-
140
+ use-shuffled]
141
+ --custom-dataset-with-gt / --skip-custom-dataset-with-gt
142
+ Custom dataset with ground truth or skip [default: custom-dataset-
143
+ with-gt]
120
144
  --help Show this message and exit.
121
145
  ```
122
146
  #### Using a configuration file.
@@ -461,6 +485,8 @@ def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
461
485
  3. Update db_config and db_case_config to match client requirements
462
486
  4. Continue to add new functions for each index config.
463
487
  5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
488
+ 6. Import the `get_custom_case_config` function from `vectordb_bench/cli/cli.py` and use it to add a new key `custom_case` to the `parameters` variable within the command.
489
+
464
490
 
465
491
  > cli modules with multiple index configs:
466
492
  > - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
@@ -56,7 +56,7 @@ all = [
56
56
  "weaviate-client",
57
57
  "elasticsearch",
58
58
  "pgvector",
59
- "pgvecto_rs[psycopg3]>=0.2.1",
59
+ "pgvecto_rs[psycopg3]>=0.2.2",
60
60
  "sqlalchemy",
61
61
  "redis",
62
62
  "chromadb",
@@ -72,7 +72,8 @@ weaviate = [ "weaviate-client" ]
72
72
  elastic = [ "elasticsearch" ]
73
73
  pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
74
74
  pgvectorscale = [ "psycopg", "psycopg-binary", "pgvector" ]
75
- pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.1" ]
75
+ pgdiskann = [ "psycopg", "psycopg-binary", "pgvector" ]
76
+ pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.2" ]
76
77
  redis = [ "redis" ]
77
78
  memorydb = [ "memorydb" ]
78
79
  chromadb = [ "chromadb" ]
@@ -37,23 +37,24 @@ class config:
37
37
  K_DEFAULT = 100 # default return top k nearest neighbors during search
38
38
  CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
39
39
 
40
- CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
41
- LOAD_TIMEOUT_DEFAULT = 2.5 * 3600 # 2.5h
42
- LOAD_TIMEOUT_768D_1M = 2.5 * 3600 # 2.5h
43
- LOAD_TIMEOUT_768D_10M = 25 * 3600 # 25h
44
- LOAD_TIMEOUT_768D_100M = 250 * 3600 # 10.41d
40
+ CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
41
+ LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
42
+ LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
43
+ LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
44
+ LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
45
45
 
46
- LOAD_TIMEOUT_1536D_500K = 2.5 * 3600 # 2.5h
47
- LOAD_TIMEOUT_1536D_5M = 25 * 3600 # 25h
46
+ LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
47
+ LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
48
48
 
49
- OPTIMIZE_TIMEOUT_DEFAULT = 30 * 60 # 30min
50
- OPTIMIZE_TIMEOUT_768D_1M = 30 * 60 # 30min
51
- OPTIMIZE_TIMEOUT_768D_10M = 5 * 3600 # 5h
52
- OPTIMIZE_TIMEOUT_768D_100M = 50 * 3600 # 50h
49
+ OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
50
+ OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
51
+ OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
52
+ OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
53
53
 
54
54
 
55
- OPTIMIZE_TIMEOUT_1536D_500K = 15 * 60 # 15min
56
- OPTIMIZE_TIMEOUT_1536D_5M = 2.5 * 3600 # 2.5h
55
+ OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
56
+ OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
57
+
57
58
  def display(self) -> str:
58
59
  tmp = [
59
60
  i for i in inspect.getmembers(self)
@@ -31,6 +31,7 @@ class DB(Enum):
31
31
  PgVector = "PgVector"
32
32
  PgVectoRS = "PgVectoRS"
33
33
  PgVectorScale = "PgVectorScale"
34
+ PgDiskANN = "PgDiskANN"
34
35
  Redis = "Redis"
35
36
  MemoryDB = "MemoryDB"
36
37
  Chroma = "Chroma"
@@ -77,6 +78,10 @@ class DB(Enum):
77
78
  from .pgvectorscale.pgvectorscale import PgVectorScale
78
79
  return PgVectorScale
79
80
 
81
+ if self == DB.PgDiskANN:
82
+ from .pgdiskann.pgdiskann import PgDiskANN
83
+ return PgDiskANN
84
+
80
85
  if self == DB.Redis:
81
86
  from .redis.redis import Redis
82
87
  return Redis
@@ -132,6 +137,10 @@ class DB(Enum):
132
137
  from .pgvectorscale.config import PgVectorScaleConfig
133
138
  return PgVectorScaleConfig
134
139
 
140
+ if self == DB.PgDiskANN:
141
+ from .pgdiskann.config import PgDiskANNConfig
142
+ return PgDiskANNConfig
143
+
135
144
  if self == DB.Redis:
136
145
  from .redis.config import RedisConfig
137
146
  return RedisConfig
@@ -185,6 +194,10 @@ class DB(Enum):
185
194
  from .pgvectorscale.config import _pgvectorscale_case_config
186
195
  return _pgvectorscale_case_config.get(index_type)
187
196
 
197
+ if self == DB.PgDiskANN:
198
+ from .pgdiskann.config import _pgdiskann_case_config
199
+ return _pgdiskann_case_config.get(index_type)
200
+
188
201
  # DB.Pinecone, DB.Chroma, DB.Redis
189
202
  return EmptyDBCaseConfig
190
203
 
@@ -10,6 +10,8 @@ class MetricType(str, Enum):
10
10
  L2 = "L2"
11
11
  COSINE = "COSINE"
12
12
  IP = "IP"
13
+ HAMMING = "HAMMING"
14
+ JACCARD = "JACCARD"
13
15
 
14
16
 
15
17
  class IndexType(str, Enum):
@@ -3,7 +3,7 @@ from contextlib import contextmanager
3
3
  import time
4
4
  from typing import Iterable, Type
5
5
  from ..api import VectorDB, DBCaseConfig, DBConfig, IndexType
6
- from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
6
+ from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig, AWSOS_Engine
7
7
  from opensearchpy import OpenSearch
8
8
  from opensearchpy.helpers import bulk
9
9
 
@@ -83,7 +83,7 @@ class AWSOpenSearch(VectorDB):
83
83
 
84
84
  @contextmanager
85
85
  def init(self) -> None:
86
- """connect to elasticsearch"""
86
+ """connect to opensearch"""
87
87
  self.client = OpenSearch(**self.db_config)
88
88
 
89
89
  yield
@@ -97,7 +97,7 @@ class AWSOpenSearch(VectorDB):
97
97
  metadata: list[int],
98
98
  **kwargs,
99
99
  ) -> tuple[int, Exception]:
100
- """Insert the embeddings to the elasticsearch."""
100
+ """Insert the embeddings to the opensearch."""
101
101
  assert self.client is not None, "should self.init() first"
102
102
 
103
103
  insert_data = []
@@ -136,13 +136,15 @@ class AWSOpenSearch(VectorDB):
136
136
  body = {
137
137
  "size": k,
138
138
  "query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
139
+ **({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {})
139
140
  }
140
141
  try:
141
- resp = self.client.search(index=self.index_name, body=body)
142
+ resp = self.client.search(index=self.index_name, body=body,size=k,_source=False,docvalue_fields=[self.id_col_name],stored_fields="_none_",filter_path=[f"hits.hits.fields.{self.id_col_name}"],)
142
143
  log.info(f'Search took: {resp["took"]}')
143
144
  log.info(f'Search shards: {resp["_shards"]}')
144
145
  log.info(f'Search hits total: {resp["hits"]["total"]}')
145
- result = [int(d["_id"]) for d in resp["hits"]["hits"]]
146
+ result = [h["fields"][self.id_col_name][0] for h in resp["hits"]["hits"]]
147
+ #result = [int(d["_id"]) for d in resp["hits"]["hits"]]
146
148
  # log.info(f'success! length={len(res)}')
147
149
 
148
150
  return result
@@ -152,7 +154,46 @@ class AWSOpenSearch(VectorDB):
152
154
 
153
155
  def optimize(self):
154
156
  """optimize will be called between insertion and search in performance cases."""
155
- pass
157
+ # Call refresh first to ensure that all segments are created
158
+ self._refresh_index()
159
+ self._do_force_merge()
160
+ # Call refresh again to ensure that the index is ready after force merge.
161
+ self._refresh_index()
162
+ # ensure that all graphs are loaded in memory and ready for search
163
+ self._load_graphs_to_memory()
164
+
165
+ def _refresh_index(self):
166
+ log.debug(f"Starting refresh for index {self.index_name}")
167
+ SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
168
+ while True:
169
+ try:
170
+ log.info(f"Starting the Refresh Index..")
171
+ self.client.indices.refresh(index=self.index_name)
172
+ break
173
+ except Exception as e:
174
+ log.info(
175
+ f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
176
+ time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
177
+ continue
178
+ log.debug(f"Completed refresh for index {self.index_name}")
179
+
180
+ def _do_force_merge(self):
181
+ log.debug(f"Starting force merge for index {self.index_name}")
182
+ force_merge_endpoint = f'/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
183
+ force_merge_task_id = self.client.transport.perform_request('POST', force_merge_endpoint)['task']
184
+ SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
185
+ while True:
186
+ time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
187
+ task_status = self.client.tasks.get(task_id=force_merge_task_id)
188
+ if task_status['completed']:
189
+ break
190
+ log.debug(f"Completed force merge for index {self.index_name}")
191
+
192
+ def _load_graphs_to_memory(self):
193
+ if self.case_config.engine != AWSOS_Engine.lucene:
194
+ log.info("Calling warmup API to load graphs into memory")
195
+ warmup_endpoint = f'/_plugins/_knn/warmup/{self.index_name}'
196
+ self.client.transport.perform_request('GET', warmup_endpoint)
156
197
 
157
198
  def ready_to_load(self):
158
199
  """ready_to_load will be called before load in load cases."""
@@ -1,9 +1,10 @@
1
+ import logging
1
2
  from enum import Enum
2
3
  from pydantic import SecretStr, BaseModel
3
4
 
4
5
  from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
5
6
 
6
-
7
+ log = logging.getLogger(__name__)
7
8
  class AWSOpenSearchConfig(DBConfig, BaseModel):
8
9
  host: str = ""
9
10
  port: int = 443
@@ -31,14 +32,18 @@ class AWSOS_Engine(Enum):
31
32
 
32
33
  class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
33
34
  metric_type: MetricType = MetricType.L2
34
- engine: AWSOS_Engine = AWSOS_Engine.nmslib
35
- efConstruction: int = 360
36
- M: int = 30
35
+ engine: AWSOS_Engine = AWSOS_Engine.faiss
36
+ efConstruction: int = 256
37
+ efSearch: int = 256
38
+ M: int = 16
37
39
 
38
40
  def parse_metric(self) -> str:
39
41
  if self.metric_type == MetricType.IP:
40
- return "innerproduct" # only support faiss / nmslib, not for Lucene.
42
+ return "innerproduct"
41
43
  elif self.metric_type == MetricType.COSINE:
44
+ if self.engine == AWSOS_Engine.faiss:
45
+ log.info(f"Using metric type as innerproduct because faiss doesn't support cosine as metric type for Opensearch")
46
+ return "innerproduct"
42
47
  return "cosinesimil"
43
48
  return "l2"
44
49
 
@@ -49,7 +54,8 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
49
54
  "engine": self.engine.value,
50
55
  "parameters": {
51
56
  "ef_construction": self.efConstruction,
52
- "m": self.M
57
+ "m": self.M,
58
+ "ef_search": self.efSearch
53
59
  }
54
60
  }
55
61
  return params
@@ -40,12 +40,12 @@ def create_index(client, index_name):
40
40
  "type": "knn_vector",
41
41
  "dimension": _DIM,
42
42
  "method": {
43
- "engine": "nmslib",
43
+ "engine": "faiss",
44
44
  "name": "hnsw",
45
45
  "space_type": "l2",
46
46
  "parameters": {
47
- "ef_construction": 128,
48
- "m": 24,
47
+ "ef_construction": 256,
48
+ "m": 16,
49
49
  }
50
50
  }
51
51
  }
@@ -108,12 +108,43 @@ def search(client, index_name):
108
108
  print('\nSearch not ready, sleep 1s')
109
109
  time.sleep(1)
110
110
 
111
+ def optimize_index(client, index_name):
112
+ print(f"Starting force merge for index {index_name}")
113
+ force_merge_endpoint = f'/{index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
114
+ force_merge_task_id = client.transport.perform_request('POST', force_merge_endpoint)['task']
115
+ SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
116
+ while True:
117
+ time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
118
+ task_status = client.tasks.get(task_id=force_merge_task_id)
119
+ if task_status['completed']:
120
+ break
121
+ print(f"Completed force merge for index {index_name}")
122
+
123
+
124
+ def refresh_index(client, index_name):
125
+ print(f"Starting refresh for index {index_name}")
126
+ SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
127
+ while True:
128
+ try:
129
+ print(f"Starting the Refresh Index..")
130
+ client.indices.refresh(index=index_name)
131
+ break
132
+ except Exception as e:
133
+ print(
134
+ f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
135
+ time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
136
+ continue
137
+ print(f"Completed refresh for index {index_name}")
138
+
139
+
111
140
 
112
141
  def main():
113
142
  client = create_client()
114
143
  try:
115
144
  create_index(client, _INDEX_NAME)
116
145
  bulk_insert(client, _INDEX_NAME)
146
+ optimize_index(client, _INDEX_NAME)
147
+ refresh_index(client, _INDEX_NAME)
117
148
  search(client, _INDEX_NAME)
118
149
  delete_index(client, _INDEX_NAME)
119
150
  except Exception as e:
@@ -0,0 +1,99 @@
1
+ import click
2
+ import os
3
+ from pydantic import SecretStr
4
+
5
+ from ....cli.cli import (
6
+ CommonTypedDict,
7
+ cli,
8
+ click_parameter_decorators_from_typed_dict,
9
+ run,
10
+ )
11
+ from typing import Annotated, Optional, Unpack
12
+ from vectordb_bench.backend.clients import DB
13
+
14
+
15
+ class PgDiskAnnTypedDict(CommonTypedDict):
16
+ user_name: Annotated[
17
+ str, click.option("--user-name", type=str, help="Db username", required=True)
18
+ ]
19
+ password: Annotated[
20
+ str,
21
+ click.option("--password",
22
+ type=str,
23
+ help="Postgres database password",
24
+ default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
25
+ show_default="$POSTGRES_PASSWORD",
26
+ ),
27
+ ]
28
+
29
+ host: Annotated[
30
+ str, click.option("--host", type=str, help="Db host", required=True)
31
+ ]
32
+ db_name: Annotated[
33
+ str, click.option("--db-name", type=str, help="Db name", required=True)
34
+ ]
35
+ max_neighbors: Annotated[
36
+ int,
37
+ click.option(
38
+ "--max-neighbors", type=int, help="PgDiskAnn max neighbors",
39
+ ),
40
+ ]
41
+ l_value_ib: Annotated[
42
+ int,
43
+ click.option(
44
+ "--l-value-ib", type=int, help="PgDiskAnn l_value_ib",
45
+ ),
46
+ ]
47
+ l_value_is: Annotated[
48
+ float,
49
+ click.option(
50
+ "--l-value-is", type=float, help="PgDiskAnn l_value_is",
51
+ ),
52
+ ]
53
+ maintenance_work_mem: Annotated[
54
+ Optional[str],
55
+ click.option(
56
+ "--maintenance-work-mem",
57
+ type=str,
58
+ help="Sets the maximum memory to be used for maintenance operations (index creation). "
59
+ "Can be entered as string with unit like '64GB' or as an integer number of KB."
60
+ "This will set the parameters: max_parallel_maintenance_workers,"
61
+ " max_parallel_workers & table(parallel_workers)",
62
+ required=False,
63
+ ),
64
+ ]
65
+ max_parallel_workers: Annotated[
66
+ Optional[int],
67
+ click.option(
68
+ "--max-parallel-workers",
69
+ type=int,
70
+ help="Sets the maximum number of parallel processes per maintenance operation (index creation)",
71
+ required=False,
72
+ ),
73
+ ]
74
+
75
+ @cli.command()
76
+ @click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
77
+ def PgDiskAnn(
78
+ **parameters: Unpack[PgDiskAnnTypedDict],
79
+ ):
80
+ from .config import PgDiskANNConfig, PgDiskANNImplConfig
81
+
82
+ run(
83
+ db=DB.PgDiskANN,
84
+ db_config=PgDiskANNConfig(
85
+ db_label=parameters["db_label"],
86
+ user_name=SecretStr(parameters["user_name"]),
87
+ password=SecretStr(parameters["password"]),
88
+ host=parameters["host"],
89
+ db_name=parameters["db_name"],
90
+ ),
91
+ db_case_config=PgDiskANNImplConfig(
92
+ max_neighbors=parameters["max_neighbors"],
93
+ l_value_ib=parameters["l_value_ib"],
94
+ l_value_is=parameters["l_value_is"],
95
+ max_parallel_workers=parameters["max_parallel_workers"],
96
+ maintenance_work_mem=parameters["maintenance_work_mem"],
97
+ ),
98
+ **parameters,
99
+ )
@@ -0,0 +1,145 @@
1
+ from abc import abstractmethod
2
+ from typing import Any, Mapping, Optional, Sequence, TypedDict
3
+ from pydantic import BaseModel, SecretStr
4
+ from typing_extensions import LiteralString
5
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
6
+
7
+ POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
8
+
9
+
10
+ class PgDiskANNConfigDict(TypedDict):
11
+ """These keys will be directly used as kwargs in psycopg connection string,
12
+ so the names must match exactly psycopg API"""
13
+
14
+ user: str
15
+ password: str
16
+ host: str
17
+ port: int
18
+ dbname: str
19
+
20
+
21
+ class PgDiskANNConfig(DBConfig):
22
+ user_name: SecretStr = SecretStr("postgres")
23
+ password: SecretStr
24
+ host: str = "localhost"
25
+ port: int = 5432
26
+ db_name: str
27
+
28
+ def to_dict(self) -> PgDiskANNConfigDict:
29
+ user_str = self.user_name.get_secret_value()
30
+ pwd_str = self.password.get_secret_value()
31
+ return {
32
+ "host": self.host,
33
+ "port": self.port,
34
+ "dbname": self.db_name,
35
+ "user": user_str,
36
+ "password": pwd_str,
37
+ }
38
+
39
+
40
+ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
41
+ metric_type: MetricType | None = None
42
+ create_index_before_load: bool = False
43
+ create_index_after_load: bool = True
44
+ maintenance_work_mem: Optional[str]
45
+ max_parallel_workers: Optional[int]
46
+
47
+ def parse_metric(self) -> str:
48
+ if self.metric_type == MetricType.L2:
49
+ return "vector_l2_ops"
50
+ elif self.metric_type == MetricType.IP:
51
+ return "vector_ip_ops"
52
+ return "vector_cosine_ops"
53
+
54
+ def parse_metric_fun_op(self) -> LiteralString:
55
+ if self.metric_type == MetricType.L2:
56
+ return "<->"
57
+ elif self.metric_type == MetricType.IP:
58
+ return "<#>"
59
+ return "<=>"
60
+
61
+ def parse_metric_fun_str(self) -> str:
62
+ if self.metric_type == MetricType.L2:
63
+ return "l2_distance"
64
+ elif self.metric_type == MetricType.IP:
65
+ return "max_inner_product"
66
+ return "cosine_distance"
67
+
68
+ @abstractmethod
69
+ def index_param(self) -> dict:
70
+ ...
71
+
72
+ @abstractmethod
73
+ def search_param(self) -> dict:
74
+ ...
75
+
76
+ @abstractmethod
77
+ def session_param(self) -> dict:
78
+ ...
79
+
80
+ @staticmethod
81
+ def _optionally_build_with_options(with_options: Mapping[str, Any]) -> Sequence[dict[str, Any]]:
82
+ """Walk through mappings, creating a List of {key1 = value} pairs. That will be used to build a where clause"""
83
+ options = []
84
+ for option_name, value in with_options.items():
85
+ if value is not None:
86
+ options.append(
87
+ {
88
+ "option_name": option_name,
89
+ "val": str(value),
90
+ }
91
+ )
92
+ return options
93
+
94
+ @staticmethod
95
+ def _optionally_build_set_options(
96
+ set_mapping: Mapping[str, Any]
97
+ ) -> Sequence[dict[str, Any]]:
98
+ """Walk through options, creating 'SET 'key1 = "value1";' list"""
99
+ session_options = []
100
+ for setting_name, value in set_mapping.items():
101
+ if value:
102
+ session_options.append(
103
+ {"parameter": {
104
+ "setting_name": setting_name,
105
+ "val": str(value),
106
+ },
107
+ }
108
+ )
109
+ return session_options
110
+
111
+
112
+ class PgDiskANNImplConfig(PgDiskANNIndexConfig):
113
+ index: IndexType = IndexType.DISKANN
114
+ max_neighbors: int | None
115
+ l_value_ib: int | None
116
+ l_value_is: float | None
117
+ maintenance_work_mem: Optional[str] = None
118
+ max_parallel_workers: Optional[int] = None
119
+
120
+ def index_param(self) -> dict:
121
+ return {
122
+ "metric": self.parse_metric(),
123
+ "index_type": self.index.value,
124
+ "options": {
125
+ "max_neighbors": self.max_neighbors,
126
+ "l_value_ib": self.l_value_ib,
127
+ },
128
+ "maintenance_work_mem": self.maintenance_work_mem,
129
+ "max_parallel_workers": self.max_parallel_workers,
130
+ }
131
+
132
+ def search_param(self) -> dict:
133
+ return {
134
+ "metric": self.parse_metric(),
135
+ "metric_fun_op": self.parse_metric_fun_op(),
136
+ }
137
+
138
+ def session_param(self) -> dict:
139
+ return {
140
+ "diskann.l_value_is": self.l_value_is,
141
+ }
142
+
143
+ _pgdiskann_case_config = {
144
+ IndexType.DISKANN: PgDiskANNImplConfig,
145
+ }