vectordb-bench 0.0.21__tar.gz → 0.0.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/.gitignore +3 -1
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/PKG-INFO +67 -3
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/README.md +57 -1
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/install/requirements_py3.11.txt +1 -1
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/install.py +2 -1
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/pyproject.toml +6 -2
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/__init__.py +48 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/api.py +1 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +53 -4
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/aws_opensearch/cli.py +125 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aws_opensearch/config.py +10 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/mariadb/cli.py +107 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/mariadb/config.py +71 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/mariadb/mariadb.py +214 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/milvus/cli.py +50 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/milvus/config.py +33 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/mongodb/config.py +53 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/mongodb/mongodb.py +200 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvector/cli.py +13 -1
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvector/config.py +22 -5
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvector/pgvector.py +62 -19
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/tidb/cli.py +98 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/tidb/config.py +49 -0
- vectordb_bench-0.0.23/vectordb_bench/backend/clients/tidb/tidb.py +234 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/cli/vectordbbench.py +4 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/custom/displaypPrams.py +12 -1
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/submitTask.py +20 -3
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/config/dbCaseConfigs.py +128 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/config/styles.py +2 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/log_util.py +15 -2
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/models.py +7 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench.egg-info/PKG-INFO +67 -3
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench.egg-info/SOURCES.txt +8 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench.egg-info/requires.txt +12 -1
- vectordb_bench-0.0.21/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -41
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/.devcontainer/Dockerfile +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/.devcontainer/devcontainer.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/.env.example +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/.github/workflows/pull_request.yml +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/Dockerfile +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/LICENSE +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/Makefile +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/OWNERS +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/fig/custom_case_run_test.png +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/fig/custom_dataset.png +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/setup.cfg +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/conftest.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/pytest.ini +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_bench_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_chroma.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_data_source.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_dataset.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_models.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_rate_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_redis.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/test_utils.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/tests/ut_cases.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/__init__.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/__main__.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/assembler.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/cases.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/chroma/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/milvus/milvus.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/redis/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/redis/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/redis/redis.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/test/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/test/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/test/test.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/data_source.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/dataset.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/result_collector.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/runner/__init__.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/runner/mp_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/runner/rate_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/runner/read_write_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/runner/serial_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/runner/util.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/task_runner.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/backend/utils.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/base.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/cli/__init__.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/cli/cli.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/config-files/sample_config.yml +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/custom/custom_case.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/data.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/components/tables/data.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/config/dbPrices.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/pages/concurrent.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/pages/custom.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/pages/run_test.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/pages/tables.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/utils.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/interface.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/metric.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/dbPrices.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/getLeaderboardData.py +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench/results/leaderboard.json +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench.egg-info/entry_points.txt +0 -0
- {vectordb_bench-0.0.21 → vectordb_bench-0.0.23}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.23
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -21,7 +21,7 @@ Requires-Dist: oss2
|
|
21
21
|
Requires-Dist: psutil
|
22
22
|
Requires-Dist: polars
|
23
23
|
Requires-Dist: plotly
|
24
|
-
Requires-Dist: environs
|
24
|
+
Requires-Dist: environs<14.1.0
|
25
25
|
Requires-Dist: pydantic<v2
|
26
26
|
Requires-Dist: scikit-learn
|
27
27
|
Requires-Dist: pymilvus
|
@@ -48,6 +48,8 @@ Requires-Dist: opensearch-py; extra == "all"
|
|
48
48
|
Requires-Dist: memorydb; extra == "all"
|
49
49
|
Requires-Dist: alibabacloud_ha3engine_vector; extra == "all"
|
50
50
|
Requires-Dist: alibabacloud_searchengine20211025; extra == "all"
|
51
|
+
Requires-Dist: mariadb; extra == "all"
|
52
|
+
Requires-Dist: PyMySQL; extra == "all"
|
51
53
|
Provides-Extra: qdrant
|
52
54
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
53
55
|
Provides-Extra: pinecone
|
@@ -73,6 +75,12 @@ Requires-Dist: opensearch-py; extra == "opensearch"
|
|
73
75
|
Provides-Extra: aliyun-opensearch
|
74
76
|
Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
|
75
77
|
Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
|
78
|
+
Provides-Extra: mongodb
|
79
|
+
Requires-Dist: pymongo; extra == "mongodb"
|
80
|
+
Provides-Extra: mariadb
|
81
|
+
Requires-Dist: mariadb; extra == "mariadb"
|
82
|
+
Provides-Extra: tidb
|
83
|
+
Requires-Dist: PyMySQL; extra == "tidb"
|
76
84
|
|
77
85
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
78
86
|
|
@@ -89,6 +97,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
|
|
89
97
|
|
90
98
|
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
91
99
|
|
100
|
+
VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
|
101
|
+
|
92
102
|
**Leaderboard:** https://zilliz.com/benchmark
|
93
103
|
## Quick Start
|
94
104
|
### Prerequirement
|
@@ -128,6 +138,8 @@ All the database client supported
|
|
128
138
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
129
139
|
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
130
140
|
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
|
141
|
+
| mongodb | `pip install vectordb-bench[mongodb]` |
|
142
|
+
| tidb | `pip install vectordb-bench[tidb]` |
|
131
143
|
|
132
144
|
### Run
|
133
145
|
|
@@ -204,7 +216,11 @@ Options:
|
|
204
216
|
--ef-construction INTEGER hnsw ef-construction
|
205
217
|
--ef-search INTEGER hnsw ef-search
|
206
218
|
--quantization-type [none|bit|halfvec]
|
207
|
-
quantization type for vectors
|
219
|
+
quantization type for vectors (in index)
|
220
|
+
--table-quantization-type [none|bit|halfvec]
|
221
|
+
quantization type for vectors (in table). If
|
222
|
+
equal to bit, the parameter
|
223
|
+
quantization_type will be set to bit too.
|
208
224
|
--custom-case-name TEXT Custom case name i.e. PerformanceCase1536D50K
|
209
225
|
--custom-case-description TEXT Custom name description
|
210
226
|
--custom-case-load-timeout INTEGER
|
@@ -228,6 +244,47 @@ Options:
|
|
228
244
|
with-gt]
|
229
245
|
--help Show this message and exit.
|
230
246
|
```
|
247
|
+
|
248
|
+
### Run awsopensearch from command line
|
249
|
+
|
250
|
+
```shell
|
251
|
+
vectordbbench awsopensearch --db-label awsopensearch \
|
252
|
+
--m 16 --ef-construction 256 \
|
253
|
+
--host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
|
254
|
+
--user vector --password '<password>' \
|
255
|
+
--case-type Performance1536D5M --num-insert-workers 10 \
|
256
|
+
--skip-load --num-concurrency 75
|
257
|
+
```
|
258
|
+
|
259
|
+
To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
|
260
|
+
|
261
|
+
```text
|
262
|
+
$ vectordbbench awsopensearch --help
|
263
|
+
Usage: vectordbbench awsopensearch [OPTIONS]
|
264
|
+
|
265
|
+
Options:
|
266
|
+
# Sharding and Replication
|
267
|
+
--number-of-shards INTEGER Number of primary shards for the index
|
268
|
+
--number-of-replicas INTEGER Number of replica copies for each primary
|
269
|
+
shard
|
270
|
+
# Indexing Performance
|
271
|
+
--index-thread-qty INTEGER Thread count for native engine indexing
|
272
|
+
--index-thread-qty-during-force-merge INTEGER
|
273
|
+
Thread count during force merge operations
|
274
|
+
--number-of-indexing-clients INTEGER
|
275
|
+
Number of concurrent indexing clients
|
276
|
+
# Index Management
|
277
|
+
--number-of-segments INTEGER Target number of segments after merging
|
278
|
+
--refresh-interval TEXT How often to make new data available for
|
279
|
+
search
|
280
|
+
--force-merge-enabled BOOLEAN Whether to perform force merge operation
|
281
|
+
--flush-threshold-size TEXT Size threshold for flushing the transaction
|
282
|
+
log
|
283
|
+
# Memory Management
|
284
|
+
--cb-threshold TEXT k-NN Memory circuit breaker threshold
|
285
|
+
|
286
|
+
--help Show this message and exit.```
|
287
|
+
|
231
288
|
#### Using a configuration file.
|
232
289
|
|
233
290
|
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
@@ -394,6 +451,13 @@ We have strict requirements for the data set format, please follow them.
|
|
394
451
|
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
395
452
|
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
396
453
|
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
454
|
+
- We recommend limiting the number of test query vectors, like 1,000.
|
455
|
+
When conducting concurrent query tests, Vdbbench creates a large number of processes.
|
456
|
+
To minimize additional communication overhead during testing,
|
457
|
+
we prepare a complete set of test queries for each process, allowing them to run independently.
|
458
|
+
However, this means that as the number of concurrent processes increases,
|
459
|
+
the number of copied query vectors also increases significantly,
|
460
|
+
which can place substantial pressure on memory resources.
|
397
461
|
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
398
462
|
|
399
463
|
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
@@ -13,6 +13,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
|
|
13
13
|
|
14
14
|
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
15
15
|
|
16
|
+
VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
|
17
|
+
|
16
18
|
**Leaderboard:** https://zilliz.com/benchmark
|
17
19
|
## Quick Start
|
18
20
|
### Prerequirement
|
@@ -52,6 +54,8 @@ All the database client supported
|
|
52
54
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
53
55
|
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
54
56
|
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
|
57
|
+
| mongodb | `pip install vectordb-bench[mongodb]` |
|
58
|
+
| tidb | `pip install vectordb-bench[tidb]` |
|
55
59
|
|
56
60
|
### Run
|
57
61
|
|
@@ -128,7 +132,11 @@ Options:
|
|
128
132
|
--ef-construction INTEGER hnsw ef-construction
|
129
133
|
--ef-search INTEGER hnsw ef-search
|
130
134
|
--quantization-type [none|bit|halfvec]
|
131
|
-
quantization type for vectors
|
135
|
+
quantization type for vectors (in index)
|
136
|
+
--table-quantization-type [none|bit|halfvec]
|
137
|
+
quantization type for vectors (in table). If
|
138
|
+
equal to bit, the parameter
|
139
|
+
quantization_type will be set to bit too.
|
132
140
|
--custom-case-name TEXT Custom case name i.e. PerformanceCase1536D50K
|
133
141
|
--custom-case-description TEXT Custom name description
|
134
142
|
--custom-case-load-timeout INTEGER
|
@@ -152,6 +160,47 @@ Options:
|
|
152
160
|
with-gt]
|
153
161
|
--help Show this message and exit.
|
154
162
|
```
|
163
|
+
|
164
|
+
### Run awsopensearch from command line
|
165
|
+
|
166
|
+
```shell
|
167
|
+
vectordbbench awsopensearch --db-label awsopensearch \
|
168
|
+
--m 16 --ef-construction 256 \
|
169
|
+
--host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
|
170
|
+
--user vector --password '<password>' \
|
171
|
+
--case-type Performance1536D5M --num-insert-workers 10 \
|
172
|
+
--skip-load --num-concurrency 75
|
173
|
+
```
|
174
|
+
|
175
|
+
To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
|
176
|
+
|
177
|
+
```text
|
178
|
+
$ vectordbbench awsopensearch --help
|
179
|
+
Usage: vectordbbench awsopensearch [OPTIONS]
|
180
|
+
|
181
|
+
Options:
|
182
|
+
# Sharding and Replication
|
183
|
+
--number-of-shards INTEGER Number of primary shards for the index
|
184
|
+
--number-of-replicas INTEGER Number of replica copies for each primary
|
185
|
+
shard
|
186
|
+
# Indexing Performance
|
187
|
+
--index-thread-qty INTEGER Thread count for native engine indexing
|
188
|
+
--index-thread-qty-during-force-merge INTEGER
|
189
|
+
Thread count during force merge operations
|
190
|
+
--number-of-indexing-clients INTEGER
|
191
|
+
Number of concurrent indexing clients
|
192
|
+
# Index Management
|
193
|
+
--number-of-segments INTEGER Target number of segments after merging
|
194
|
+
--refresh-interval TEXT How often to make new data available for
|
195
|
+
search
|
196
|
+
--force-merge-enabled BOOLEAN Whether to perform force merge operation
|
197
|
+
--flush-threshold-size TEXT Size threshold for flushing the transaction
|
198
|
+
log
|
199
|
+
# Memory Management
|
200
|
+
--cb-threshold TEXT k-NN Memory circuit breaker threshold
|
201
|
+
|
202
|
+
--help Show this message and exit.```
|
203
|
+
|
155
204
|
#### Using a configuration file.
|
156
205
|
|
157
206
|
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
@@ -318,6 +367,13 @@ We have strict requirements for the data set format, please follow them.
|
|
318
367
|
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
319
368
|
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
320
369
|
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
370
|
+
- We recommend limiting the number of test query vectors, like 1,000.
|
371
|
+
When conducting concurrent query tests, Vdbbench creates a large number of processes.
|
372
|
+
To minimize additional communication overhead during testing,
|
373
|
+
we prepare a complete set of test queries for each process, allowing them to run independently.
|
374
|
+
However, this means that as the number of concurrent processes increases,
|
375
|
+
the number of copied query vectors also increases significantly,
|
376
|
+
which can place substantial pressure on memory resources.
|
321
377
|
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
322
378
|
|
323
379
|
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
@@ -35,7 +35,7 @@ dependencies = [
|
|
35
35
|
"psutil",
|
36
36
|
"polars",
|
37
37
|
"plotly",
|
38
|
-
"environs",
|
38
|
+
"environs<14.1.0",
|
39
39
|
"pydantic<v2",
|
40
40
|
"scikit-learn",
|
41
41
|
"pymilvus", # with pandas, numpy, ujson
|
@@ -68,6 +68,8 @@ all = [
|
|
68
68
|
"memorydb",
|
69
69
|
"alibabacloud_ha3engine_vector",
|
70
70
|
"alibabacloud_searchengine20211025",
|
71
|
+
"mariadb",
|
72
|
+
"PyMySQL",
|
71
73
|
]
|
72
74
|
|
73
75
|
qdrant = [ "qdrant-client" ]
|
@@ -85,6 +87,9 @@ memorydb = [ "memorydb" ]
|
|
85
87
|
chromadb = [ "chromadb" ]
|
86
88
|
opensearch = [ "opensearch-py" ]
|
87
89
|
aliyun_opensearch = [ "alibabacloud_ha3engine_vector", "alibabacloud_searchengine20211025"]
|
90
|
+
mongodb = [ "pymongo" ]
|
91
|
+
mariadb = [ "mariadb" ]
|
92
|
+
tidb = [ "PyMySQL" ]
|
88
93
|
|
89
94
|
[project.urls]
|
90
95
|
"repository" = "https://github.com/zilliztech/VectorDBBench"
|
@@ -207,4 +212,3 @@ builtins-ignorelist = [
|
|
207
212
|
# "dict", # TODO
|
208
213
|
# "filter",
|
209
214
|
]
|
210
|
-
|
@@ -38,8 +38,11 @@ class DB(Enum):
|
|
38
38
|
Chroma = "Chroma"
|
39
39
|
AWSOpenSearch = "OpenSearch"
|
40
40
|
AliyunElasticsearch = "AliyunElasticsearch"
|
41
|
+
MariaDB = "MariaDB"
|
41
42
|
Test = "test"
|
42
43
|
AliyunOpenSearch = "AliyunOpenSearch"
|
44
|
+
MongoDB = "MongoDB"
|
45
|
+
TiDB = "TiDB"
|
43
46
|
|
44
47
|
@property
|
45
48
|
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
|
@@ -129,6 +132,21 @@ class DB(Enum):
|
|
129
132
|
|
130
133
|
return AliyunOpenSearch
|
131
134
|
|
135
|
+
if self == DB.MongoDB:
|
136
|
+
from .mongodb.mongodb import MongoDB
|
137
|
+
|
138
|
+
return MongoDB
|
139
|
+
|
140
|
+
if self == DB.MariaDB:
|
141
|
+
from .mariadb.mariadb import MariaDB
|
142
|
+
|
143
|
+
return MariaDB
|
144
|
+
|
145
|
+
if self == DB.TiDB:
|
146
|
+
from .tidb.tidb import TiDB
|
147
|
+
|
148
|
+
return TiDB
|
149
|
+
|
132
150
|
if self == DB.Test:
|
133
151
|
from .test.test import Test
|
134
152
|
|
@@ -225,6 +243,21 @@ class DB(Enum):
|
|
225
243
|
|
226
244
|
return AliyunOpenSearchConfig
|
227
245
|
|
246
|
+
if self == DB.MongoDB:
|
247
|
+
from .mongodb.config import MongoDBConfig
|
248
|
+
|
249
|
+
return MongoDBConfig
|
250
|
+
|
251
|
+
if self == DB.MariaDB:
|
252
|
+
from .mariadb.config import MariaDBConfig
|
253
|
+
|
254
|
+
return MariaDBConfig
|
255
|
+
|
256
|
+
if self == DB.TiDB:
|
257
|
+
from .tidb.config import TiDBConfig
|
258
|
+
|
259
|
+
return TiDBConfig
|
260
|
+
|
228
261
|
if self == DB.Test:
|
229
262
|
from .test.config import TestConfig
|
230
263
|
|
@@ -302,6 +335,21 @@ class DB(Enum):
|
|
302
335
|
|
303
336
|
return AliyunOpenSearchIndexConfig
|
304
337
|
|
338
|
+
if self == DB.MongoDB:
|
339
|
+
from .mongodb.config import MongoDBIndexConfig
|
340
|
+
|
341
|
+
return MongoDBIndexConfig
|
342
|
+
|
343
|
+
if self == DB.MariaDB:
|
344
|
+
from .mariadb.config import _mariadb_case_config
|
345
|
+
|
346
|
+
return _mariadb_case_config.get(index_type)
|
347
|
+
|
348
|
+
if self == DB.TiDB:
|
349
|
+
from .tidb.config import TiDBIndexConfig
|
350
|
+
|
351
|
+
return TiDBIndexConfig
|
352
|
+
|
305
353
|
# DB.Pinecone, DB.Chroma, DB.Redis
|
306
354
|
return EmptyDBCaseConfig
|
307
355
|
|
@@ -12,6 +12,7 @@ log = logging.getLogger(__name__)
|
|
12
12
|
|
13
13
|
WAITING_FOR_REFRESH_SEC = 30
|
14
14
|
WAITING_FOR_FORCE_MERGE_SEC = 30
|
15
|
+
SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC = 30
|
15
16
|
|
16
17
|
|
17
18
|
class AWSOpenSearch(VectorDB):
|
@@ -52,10 +53,27 @@ class AWSOpenSearch(VectorDB):
|
|
52
53
|
return AWSOpenSearchIndexConfig
|
53
54
|
|
54
55
|
def _create_index(self, client: OpenSearch):
|
56
|
+
cluster_settings_body = {
|
57
|
+
"persistent": {
|
58
|
+
"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
|
59
|
+
"knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
|
60
|
+
}
|
61
|
+
}
|
62
|
+
client.cluster.put_settings(cluster_settings_body)
|
55
63
|
settings = {
|
56
64
|
"index": {
|
57
65
|
"knn": True,
|
66
|
+
"number_of_shards": self.case_config.number_of_shards,
|
67
|
+
"number_of_replicas": 0,
|
68
|
+
"translog.flush_threshold_size": self.case_config.flush_threshold_size,
|
69
|
+
# Setting trans log threshold to 5GB
|
70
|
+
**(
|
71
|
+
{"knn.algo_param.ef_search": self.case_config.ef_search}
|
72
|
+
if self.case_config.engine == AWSOS_Engine.nmslib
|
73
|
+
else {}
|
74
|
+
),
|
58
75
|
},
|
76
|
+
"refresh_interval": self.case_config.refresh_interval,
|
59
77
|
}
|
60
78
|
mappings = {
|
61
79
|
"properties": {
|
@@ -145,9 +163,9 @@ class AWSOpenSearch(VectorDB):
|
|
145
163
|
docvalue_fields=[self.id_col_name],
|
146
164
|
stored_fields="_none_",
|
147
165
|
)
|
148
|
-
log.
|
149
|
-
log.
|
150
|
-
log.
|
166
|
+
log.debug(f"Search took: {resp['took']}")
|
167
|
+
log.debug(f"Search shards: {resp['_shards']}")
|
168
|
+
log.debug(f"Search hits total: {resp['hits']['total']}")
|
151
169
|
return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
|
152
170
|
except Exception as e:
|
153
171
|
log.warning(f"Failed to search: {self.index_name} error: {e!s}")
|
@@ -157,12 +175,37 @@ class AWSOpenSearch(VectorDB):
|
|
157
175
|
"""optimize will be called between insertion and search in performance cases."""
|
158
176
|
# Call refresh first to ensure that all segments are created
|
159
177
|
self._refresh_index()
|
160
|
-
self.
|
178
|
+
if self.case_config.force_merge_enabled:
|
179
|
+
self._do_force_merge()
|
180
|
+
self._refresh_index()
|
181
|
+
self._update_replicas()
|
161
182
|
# Call refresh again to ensure that the index is ready after force merge.
|
162
183
|
self._refresh_index()
|
163
184
|
# ensure that all graphs are loaded in memory and ready for search
|
164
185
|
self._load_graphs_to_memory()
|
165
186
|
|
187
|
+
def _update_replicas(self):
|
188
|
+
index_settings = self.client.indices.get_settings(index=self.index_name)
|
189
|
+
current_number_of_replicas = int(index_settings[self.index_name]["settings"]["index"]["number_of_replicas"])
|
190
|
+
log.info(
|
191
|
+
f"Current Number of replicas are {current_number_of_replicas}"
|
192
|
+
f" and changing the replicas to {self.case_config.number_of_replicas}"
|
193
|
+
)
|
194
|
+
settings_body = {"index": {"number_of_replicas": self.case_config.number_of_replicas}}
|
195
|
+
self.client.indices.put_settings(index=self.index_name, body=settings_body)
|
196
|
+
self._wait_till_green()
|
197
|
+
|
198
|
+
def _wait_till_green(self):
|
199
|
+
log.info("Wait for index to become green..")
|
200
|
+
while True:
|
201
|
+
res = self.client.cat.indices(index=self.index_name, h="health", format="json")
|
202
|
+
health = res[0]["health"]
|
203
|
+
if health != "green":
|
204
|
+
break
|
205
|
+
log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
|
206
|
+
time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
|
207
|
+
log.info(f"Index {self.index_name} is green..")
|
208
|
+
|
166
209
|
def _refresh_index(self):
|
167
210
|
log.debug(f"Starting refresh for index {self.index_name}")
|
168
211
|
while True:
|
@@ -179,6 +222,12 @@ class AWSOpenSearch(VectorDB):
|
|
179
222
|
log.debug(f"Completed refresh for index {self.index_name}")
|
180
223
|
|
181
224
|
def _do_force_merge(self):
|
225
|
+
log.info(f"Updating the Index thread qty to {self.case_config.index_thread_qty_during_force_merge}.")
|
226
|
+
|
227
|
+
cluster_settings_body = {
|
228
|
+
"persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
|
229
|
+
}
|
230
|
+
self.client.cluster.put_settings(cluster_settings_body)
|
182
231
|
log.debug(f"Starting force merge for index {self.index_name}")
|
183
232
|
force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
|
184
233
|
force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
|
@@ -0,0 +1,125 @@
|
|
1
|
+
from typing import Annotated, TypedDict, Unpack
|
2
|
+
|
3
|
+
import click
|
4
|
+
from pydantic import SecretStr
|
5
|
+
|
6
|
+
from ....cli.cli import (
|
7
|
+
CommonTypedDict,
|
8
|
+
HNSWFlavor2,
|
9
|
+
cli,
|
10
|
+
click_parameter_decorators_from_typed_dict,
|
11
|
+
run,
|
12
|
+
)
|
13
|
+
from .. import DB
|
14
|
+
|
15
|
+
|
16
|
+
class AWSOpenSearchTypedDict(TypedDict):
|
17
|
+
host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
|
18
|
+
port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
|
19
|
+
user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
|
20
|
+
password: Annotated[str, click.option("--password", type=str, help="Db password")]
|
21
|
+
number_of_shards: Annotated[
|
22
|
+
int,
|
23
|
+
click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
|
24
|
+
]
|
25
|
+
number_of_replicas: Annotated[
|
26
|
+
int,
|
27
|
+
click.option(
|
28
|
+
"--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
|
29
|
+
),
|
30
|
+
]
|
31
|
+
index_thread_qty: Annotated[
|
32
|
+
int,
|
33
|
+
click.option(
|
34
|
+
"--index-thread-qty",
|
35
|
+
type=int,
|
36
|
+
help="Thread count for native engine indexing",
|
37
|
+
default=4,
|
38
|
+
),
|
39
|
+
]
|
40
|
+
|
41
|
+
index_thread_qty_during_force_merge: Annotated[
|
42
|
+
int,
|
43
|
+
click.option(
|
44
|
+
"--index-thread-qty-during-force-merge",
|
45
|
+
type=int,
|
46
|
+
help="Thread count during force merge operations",
|
47
|
+
default=4,
|
48
|
+
),
|
49
|
+
]
|
50
|
+
|
51
|
+
number_of_indexing_clients: Annotated[
|
52
|
+
int,
|
53
|
+
click.option(
|
54
|
+
"--number-of-indexing-clients",
|
55
|
+
type=int,
|
56
|
+
help="Number of concurrent indexing clients",
|
57
|
+
default=1,
|
58
|
+
),
|
59
|
+
]
|
60
|
+
|
61
|
+
number_of_segments: Annotated[
|
62
|
+
int,
|
63
|
+
click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
|
64
|
+
]
|
65
|
+
|
66
|
+
refresh_interval: Annotated[
|
67
|
+
int,
|
68
|
+
click.option(
|
69
|
+
"--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
|
70
|
+
),
|
71
|
+
]
|
72
|
+
|
73
|
+
force_merge_enabled: Annotated[
|
74
|
+
int,
|
75
|
+
click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
|
76
|
+
]
|
77
|
+
|
78
|
+
flush_threshold_size: Annotated[
|
79
|
+
int,
|
80
|
+
click.option(
|
81
|
+
"--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
|
82
|
+
),
|
83
|
+
]
|
84
|
+
|
85
|
+
cb_threshold: Annotated[
|
86
|
+
int,
|
87
|
+
click.option(
|
88
|
+
"--cb-threshold",
|
89
|
+
type=str,
|
90
|
+
help="k-NN Memory circuit breaker threshold",
|
91
|
+
default="50%",
|
92
|
+
),
|
93
|
+
]
|
94
|
+
|
95
|
+
|
96
|
+
class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
|
97
|
+
|
98
|
+
|
99
|
+
@cli.command()
|
100
|
+
@click_parameter_decorators_from_typed_dict(AWSOpenSearchHNSWTypedDict)
|
101
|
+
def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
|
102
|
+
from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
|
103
|
+
|
104
|
+
run(
|
105
|
+
db=DB.AWSOpenSearch,
|
106
|
+
db_config=AWSOpenSearchConfig(
|
107
|
+
host=parameters["host"],
|
108
|
+
port=parameters["port"],
|
109
|
+
user=parameters["user"],
|
110
|
+
password=SecretStr(parameters["password"]),
|
111
|
+
),
|
112
|
+
db_case_config=AWSOpenSearchIndexConfig(
|
113
|
+
number_of_shards=parameters["number_of_shards"],
|
114
|
+
number_of_replicas=parameters["number_of_replicas"],
|
115
|
+
index_thread_qty=parameters["index_thread_qty"],
|
116
|
+
number_of_segments=parameters["number_of_segments"],
|
117
|
+
refresh_interval=parameters["refresh_interval"],
|
118
|
+
force_merge_enabled=parameters["force_merge_enabled"],
|
119
|
+
flush_threshold_size=parameters["flush_threshold_size"],
|
120
|
+
number_of_indexing_clients=parameters["number_of_indexing_clients"],
|
121
|
+
index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
|
122
|
+
cb_threshold=parameters["cb_threshold"],
|
123
|
+
),
|
124
|
+
**parameters,
|
125
|
+
)
|
@@ -39,6 +39,16 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
|
39
39
|
efConstruction: int = 256
|
40
40
|
efSearch: int = 256
|
41
41
|
M: int = 16
|
42
|
+
index_thread_qty: int | None = 4
|
43
|
+
number_of_shards: int | None = 1
|
44
|
+
number_of_replicas: int | None = 0
|
45
|
+
number_of_segments: int | None = 1
|
46
|
+
refresh_interval: str | None = "60s"
|
47
|
+
force_merge_enabled: bool | None = True
|
48
|
+
flush_threshold_size: str | None = "5120mb"
|
49
|
+
number_of_indexing_clients: int | None = 1
|
50
|
+
index_thread_qty_during_force_merge: int
|
51
|
+
cb_threshold: str | None = "50%"
|
42
52
|
|
43
53
|
def parse_metric(self) -> str:
|
44
54
|
if self.metric_type == MetricType.IP:
|