vectordb-bench 0.0.17__tar.gz → 0.0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/PKG-INFO +13 -23
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/README.md +5 -7
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/pyproject.toml +19 -17
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_rate_runner.py +3 -3
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/cases.py +1 -1
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/__init__.py +39 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +27 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +19 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +304 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_opensearch/config.py +48 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/alloydb/alloydb.py +372 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/alloydb/cli.py +147 -0
- vectordb_bench-0.0.19/vectordb_bench/backend/clients/alloydb/config.py +168 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/api.py +5 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/milvus/cli.py +25 -1
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/milvus/config.py +16 -2
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/milvus/milvus.py +4 -6
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/rate_runner.py +32 -15
- vectordb_bench-0.0.19/vectordb_bench/backend/runner/read_write_runner.py +178 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/serial_runner.py +8 -2
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/util.py +0 -16
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/task_runner.py +4 -3
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/utils.py +1 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/cli/vectordbbench.py +2 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/config/dbCaseConfigs.py +224 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/models.py +9 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/PKG-INFO +13 -23
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/SOURCES.txt +7 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/requires.txt +7 -18
- vectordb_bench-0.0.17/vectordb_bench/backend/runner/read_write_runner.py +0 -112
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.devcontainer/Dockerfile +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.devcontainer/devcontainer.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.env.example +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.github/workflows/pull_request.yml +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.gitignore +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/.ruff.toml +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/Dockerfile +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/LICENSE +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/Makefile +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/OWNERS +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/fig/custom_case_run_test.png +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/fig/custom_dataset.png +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/install/requirements_py3.11.txt +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/install.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/setup.cfg +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/conftest.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/pytest.ini +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_bench_runner.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_chroma.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_data_source.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_dataset.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_models.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_redis.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/test_utils.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/tests/ut_cases.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/__init__.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/__main__.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/assembler.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/chroma/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/redis/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/redis/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/redis/redis.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/test/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/test/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/test/test.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/data_source.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/dataset.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/result_collector.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/__init__.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/backend/runner/mp_runner.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/base.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/cli/__init__.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/cli/cli.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/config-files/sample_config.yml +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/custom/custom_case.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/data.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/components/tables/data.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/config/dbPrices.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/config/styles.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/concurrent.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/custom.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/run_test.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/pages/tables.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/utils.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/interface.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/log_util.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/metric.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/dbPrices.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/getLeaderboardData.py +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench/results/leaderboard.json +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/entry_points.txt +0 -0
- {vectordb_bench-0.0.17 → vectordb_bench-0.0.19}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.19
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -35,15 +35,16 @@ Requires-Dist: qdrant-client; extra == "all"
|
|
35
35
|
Requires-Dist: pinecone-client; extra == "all"
|
36
36
|
Requires-Dist: weaviate-client; extra == "all"
|
37
37
|
Requires-Dist: elasticsearch; extra == "all"
|
38
|
-
Requires-Dist: pgvector; extra == "all"
|
39
|
-
Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
|
40
38
|
Requires-Dist: sqlalchemy; extra == "all"
|
41
39
|
Requires-Dist: redis; extra == "all"
|
42
40
|
Requires-Dist: chromadb; extra == "all"
|
41
|
+
Requires-Dist: pgvector; extra == "all"
|
43
42
|
Requires-Dist: psycopg; extra == "all"
|
44
43
|
Requires-Dist: psycopg-binary; extra == "all"
|
45
|
-
Requires-Dist:
|
46
|
-
Requires-Dist: opensearch-
|
44
|
+
Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
|
45
|
+
Requires-Dist: opensearch-dsl; extra == "all"
|
46
|
+
Requires-Dist: opensearch-py; extra == "all"
|
47
|
+
Requires-Dist: memorydb; extra == "all"
|
47
48
|
Provides-Extra: qdrant
|
48
49
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
49
50
|
Provides-Extra: pinecone
|
@@ -56,14 +57,6 @@ Provides-Extra: pgvector
|
|
56
57
|
Requires-Dist: psycopg; extra == "pgvector"
|
57
58
|
Requires-Dist: psycopg-binary; extra == "pgvector"
|
58
59
|
Requires-Dist: pgvector; extra == "pgvector"
|
59
|
-
Provides-Extra: pgvectorscale
|
60
|
-
Requires-Dist: psycopg; extra == "pgvectorscale"
|
61
|
-
Requires-Dist: psycopg-binary; extra == "pgvectorscale"
|
62
|
-
Requires-Dist: pgvector; extra == "pgvectorscale"
|
63
|
-
Provides-Extra: pgdiskann
|
64
|
-
Requires-Dist: psycopg; extra == "pgdiskann"
|
65
|
-
Requires-Dist: psycopg-binary; extra == "pgdiskann"
|
66
|
-
Requires-Dist: pgvector; extra == "pgdiskann"
|
67
60
|
Provides-Extra: pgvecto-rs
|
68
61
|
Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "pgvecto-rs"
|
69
62
|
Provides-Extra: redis
|
@@ -72,9 +65,8 @@ Provides-Extra: memorydb
|
|
72
65
|
Requires-Dist: memorydb; extra == "memorydb"
|
73
66
|
Provides-Extra: chromadb
|
74
67
|
Requires-Dist: chromadb; extra == "chromadb"
|
75
|
-
Provides-Extra:
|
76
|
-
Requires-Dist:
|
77
|
-
Provides-Extra: zilliz-cloud
|
68
|
+
Provides-Extra: opensearch
|
69
|
+
Requires-Dist: opensearch-py; extra == "opensearch"
|
78
70
|
|
79
71
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
80
72
|
|
@@ -107,20 +99,18 @@ All the database client supported
|
|
107
99
|
|
108
100
|
| Optional database client | install command |
|
109
101
|
|--------------------------|---------------------------------------------|
|
110
|
-
| pymilvus(*default*)
|
111
|
-
| all
|
102
|
+
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
|
103
|
+
| all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
|
112
104
|
| qdrant | `pip install vectordb-bench[qdrant]` |
|
113
105
|
| pinecone | `pip install vectordb-bench[pinecone]` |
|
114
106
|
| weaviate | `pip install vectordb-bench[weaviate]` |
|
115
|
-
| elastic
|
116
|
-
| pgvector
|
107
|
+
| elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
|
108
|
+
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
|
117
109
|
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
|
118
|
-
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
|
119
|
-
| pgdiskann | `pip install vectordb-bench[pgdiskann]` |
|
120
110
|
| redis | `pip install vectordb-bench[redis]` |
|
121
111
|
| memorydb | `pip install vectordb-bench[memorydb]` |
|
122
112
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
123
|
-
| awsopensearch | `pip install vectordb-bench[
|
113
|
+
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
124
114
|
|
125
115
|
### Run
|
126
116
|
|
@@ -29,20 +29,18 @@ All the database client supported
|
|
29
29
|
|
30
30
|
| Optional database client | install command |
|
31
31
|
|--------------------------|---------------------------------------------|
|
32
|
-
| pymilvus(*default*)
|
33
|
-
| all
|
32
|
+
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
|
33
|
+
| all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
|
34
34
|
| qdrant | `pip install vectordb-bench[qdrant]` |
|
35
35
|
| pinecone | `pip install vectordb-bench[pinecone]` |
|
36
36
|
| weaviate | `pip install vectordb-bench[weaviate]` |
|
37
|
-
| elastic
|
38
|
-
| pgvector
|
37
|
+
| elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
|
38
|
+
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
|
39
39
|
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
|
40
|
-
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
|
41
|
-
| pgdiskann | `pip install vectordb-bench[pgdiskann]` |
|
42
40
|
| redis | `pip install vectordb-bench[redis]` |
|
43
41
|
| memorydb | `pip install vectordb-bench[memorydb]` |
|
44
42
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
45
|
-
| awsopensearch | `pip install vectordb-bench[
|
43
|
+
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
46
44
|
|
47
45
|
### Run
|
48
46
|
|
@@ -55,30 +55,32 @@ all = [
|
|
55
55
|
"pinecone-client",
|
56
56
|
"weaviate-client",
|
57
57
|
"elasticsearch",
|
58
|
-
"pgvector",
|
59
|
-
"pgvecto_rs[psycopg3]>=0.2.2",
|
60
58
|
"sqlalchemy",
|
61
59
|
"redis",
|
62
60
|
"chromadb",
|
61
|
+
"pgvector",
|
63
62
|
"psycopg",
|
64
63
|
"psycopg-binary",
|
65
|
-
"
|
66
|
-
"opensearch-
|
64
|
+
"pgvecto_rs[psycopg3]>=0.2.2",
|
65
|
+
"opensearch-dsl",
|
66
|
+
"opensearch-py",
|
67
|
+
"memorydb",
|
67
68
|
]
|
68
69
|
|
69
|
-
qdrant
|
70
|
-
pinecone
|
71
|
-
weaviate
|
72
|
-
elastic
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
70
|
+
qdrant = [ "qdrant-client" ]
|
71
|
+
pinecone = [ "pinecone-client" ]
|
72
|
+
weaviate = [ "weaviate-client" ]
|
73
|
+
elastic = [ "elasticsearch" ]
|
74
|
+
# For elastic and aliyun_elasticsearch
|
75
|
+
|
76
|
+
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
|
77
|
+
# for pgvector, pgvectorscale, pgdiskann, and, alloydb
|
78
|
+
|
79
|
+
pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.2" ]
|
80
|
+
redis = [ "redis" ]
|
81
|
+
memorydb = [ "memorydb" ]
|
82
|
+
chromadb = [ "chromadb" ]
|
83
|
+
opensearch = [ "opensearch-py" ]
|
82
84
|
|
83
85
|
[project.urls]
|
84
86
|
"repository" = "https://github.com/zilliztech/VectorDBBench"
|
@@ -52,9 +52,9 @@ def test_read_write_runner(db, insert_rate, conc: list, search_stage: Iterable[f
|
|
52
52
|
|
53
53
|
def get_db(db: str, config: dict) -> VectorDB:
|
54
54
|
if db == DB.Milvus.name:
|
55
|
-
return DB.Milvus.init_cls(dim=768, db_config=config, db_case_config=FLATConfig(metric_type="COSINE"), drop_old=True
|
55
|
+
return DB.Milvus.init_cls(dim=768, db_config=config, db_case_config=FLATConfig(metric_type="COSINE"), drop_old=True)
|
56
56
|
elif db == DB.ZillizCloud.name:
|
57
|
-
return DB.ZillizCloud.init_cls(dim=768, db_config=config, db_case_config=AutoIndexConfig(metric_type="COSINE"), drop_old=True
|
57
|
+
return DB.ZillizCloud.init_cls(dim=768, db_config=config, db_case_config=AutoIndexConfig(metric_type="COSINE"), drop_old=True)
|
58
58
|
else:
|
59
59
|
raise ValueError(f"unknown db: {db}")
|
60
60
|
|
@@ -76,7 +76,7 @@ if __name__ == "__main__":
|
|
76
76
|
}
|
77
77
|
|
78
78
|
conc = (1, 15, 50)
|
79
|
-
search_stage = (0.5, 0.6, 0.7, 0.8, 0.9
|
79
|
+
search_stage = (0.5, 0.6, 0.7, 0.8, 0.9)
|
80
80
|
|
81
81
|
db = get_db(flags.db, config)
|
82
82
|
test_read_write_runner(
|
@@ -289,7 +289,7 @@ class Performance1536D50K(PerformanceCase):
|
|
289
289
|
description: str = """This case tests the search performance of a vector database with a medium 50K dataset (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels.
|
290
290
|
Results will show index building time, recall, and maximum QPS."""
|
291
291
|
load_timeout: float | int = 3600
|
292
|
-
optimize_timeout: float | int | None =
|
292
|
+
optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
|
293
293
|
|
294
294
|
|
295
295
|
def metric_type_map(s: str) -> MetricType:
|
@@ -32,11 +32,14 @@ class DB(Enum):
|
|
32
32
|
PgVectoRS = "PgVectoRS"
|
33
33
|
PgVectorScale = "PgVectorScale"
|
34
34
|
PgDiskANN = "PgDiskANN"
|
35
|
+
AlloyDB = "AlloyDB"
|
35
36
|
Redis = "Redis"
|
36
37
|
MemoryDB = "MemoryDB"
|
37
38
|
Chroma = "Chroma"
|
38
39
|
AWSOpenSearch = "OpenSearch"
|
40
|
+
AliyunElasticsearch = "AliyunElasticsearch"
|
39
41
|
Test = "test"
|
42
|
+
AliyunOpenSearch = "AliyunOpenSearch"
|
40
43
|
|
41
44
|
|
42
45
|
@property
|
@@ -97,6 +100,18 @@ class DB(Enum):
|
|
97
100
|
if self == DB.AWSOpenSearch:
|
98
101
|
from .aws_opensearch.aws_opensearch import AWSOpenSearch
|
99
102
|
return AWSOpenSearch
|
103
|
+
|
104
|
+
if self == DB.AlloyDB:
|
105
|
+
from .alloydb.alloydb import AlloyDB
|
106
|
+
return AlloyDB
|
107
|
+
|
108
|
+
if self == DB.AliyunElasticsearch:
|
109
|
+
from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch
|
110
|
+
return AliyunElasticsearch
|
111
|
+
|
112
|
+
if self == DB.AliyunOpenSearch:
|
113
|
+
from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
|
114
|
+
return AliyunOpenSearch
|
100
115
|
|
101
116
|
@property
|
102
117
|
def config_cls(self) -> Type[DBConfig]:
|
@@ -156,6 +171,18 @@ class DB(Enum):
|
|
156
171
|
if self == DB.AWSOpenSearch:
|
157
172
|
from .aws_opensearch.config import AWSOpenSearchConfig
|
158
173
|
return AWSOpenSearchConfig
|
174
|
+
|
175
|
+
if self == DB.AlloyDB:
|
176
|
+
from .alloydb.config import AlloyDBConfig
|
177
|
+
return AlloyDBConfig
|
178
|
+
|
179
|
+
if self == DB.AliyunElasticsearch:
|
180
|
+
from .aliyun_elasticsearch.config import AliyunElasticsearchConfig
|
181
|
+
return AliyunElasticsearchConfig
|
182
|
+
|
183
|
+
if self == DB.AliyunOpenSearch:
|
184
|
+
from .aliyun_opensearch.config import AliyunOpenSearchConfig
|
185
|
+
return AliyunOpenSearchConfig
|
159
186
|
|
160
187
|
def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
|
161
188
|
if self == DB.Milvus:
|
@@ -197,6 +224,18 @@ class DB(Enum):
|
|
197
224
|
if self == DB.PgDiskANN:
|
198
225
|
from .pgdiskann.config import _pgdiskann_case_config
|
199
226
|
return _pgdiskann_case_config.get(index_type)
|
227
|
+
|
228
|
+
if self == DB.AlloyDB:
|
229
|
+
from .alloydb.config import _alloydb_case_config
|
230
|
+
return _alloydb_case_config.get(index_type)
|
231
|
+
|
232
|
+
if self == DB.AliyunElasticsearch:
|
233
|
+
from .elastic_cloud.config import ElasticCloudIndexConfig
|
234
|
+
return ElasticCloudIndexConfig
|
235
|
+
|
236
|
+
if self == DB.AliyunOpenSearch:
|
237
|
+
from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
|
238
|
+
return AliyunOpenSearchIndexConfig
|
200
239
|
|
201
240
|
# DB.Pinecone, DB.Chroma, DB.Redis
|
202
241
|
return EmptyDBCaseConfig
|
vectordb_bench-0.0.19/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from ..elastic_cloud.elastic_cloud import ElasticCloud
|
2
|
+
from ..elastic_cloud.config import ElasticCloudIndexConfig
|
3
|
+
|
4
|
+
|
5
|
+
class AliyunElasticsearch(ElasticCloud):
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
dim: int,
|
9
|
+
db_config: dict,
|
10
|
+
db_case_config: ElasticCloudIndexConfig,
|
11
|
+
indice: str = "vdb_bench_indice", # must be lowercase
|
12
|
+
id_col_name: str = "id",
|
13
|
+
vector_col_name: str = "vector",
|
14
|
+
drop_old: bool = False,
|
15
|
+
**kwargs,
|
16
|
+
):
|
17
|
+
super().__init__(
|
18
|
+
dim=dim,
|
19
|
+
db_config=db_config,
|
20
|
+
db_case_config=db_case_config,
|
21
|
+
indice=indice,
|
22
|
+
id_col_name=id_col_name,
|
23
|
+
vector_col_name=vector_col_name,
|
24
|
+
drop_old=drop_old,
|
25
|
+
**kwargs,
|
26
|
+
)
|
27
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from pydantic import SecretStr, BaseModel
|
3
|
+
|
4
|
+
from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
|
5
|
+
|
6
|
+
|
7
|
+
class AliyunElasticsearchConfig(DBConfig, BaseModel):
|
8
|
+
#: Protocol in use to connect to the node
|
9
|
+
scheme: str = "http"
|
10
|
+
host: str = ""
|
11
|
+
port: int = 9200
|
12
|
+
user: str = "elastic"
|
13
|
+
password: SecretStr
|
14
|
+
|
15
|
+
def to_dict(self) -> dict:
|
16
|
+
return {
|
17
|
+
"hosts": [{'scheme': self.scheme, 'host': self.host, 'port': self.port}],
|
18
|
+
"basic_auth": (self.user, self.password.get_secret_value()),
|
19
|
+
}
|
@@ -0,0 +1,304 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from contextlib import contextmanager
|
4
|
+
import time
|
5
|
+
|
6
|
+
from alibabacloud_ha3engine_vector.models import QueryRequest
|
7
|
+
|
8
|
+
from ..api import VectorDB, MetricType
|
9
|
+
from .config import AliyunOpenSearchIndexConfig
|
10
|
+
|
11
|
+
from alibabacloud_searchengine20211025.client import Client as searchengineClient
|
12
|
+
from alibabacloud_searchengine20211025 import models as searchengine_models
|
13
|
+
from alibabacloud_tea_openapi import models as open_api_models
|
14
|
+
from alibabacloud_ha3engine_vector import models, client
|
15
|
+
|
16
|
+
log = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
|
19
|
+
ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
|
20
|
+
|
21
|
+
class AliyunOpenSearch(VectorDB):
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
dim: int,
|
25
|
+
db_config: dict,
|
26
|
+
db_case_config: AliyunOpenSearchIndexConfig,
|
27
|
+
collection_name: str = "VectorDBBenchCollection",
|
28
|
+
drop_old: bool = False,
|
29
|
+
**kwargs,
|
30
|
+
):
|
31
|
+
self.control_client = None
|
32
|
+
self.dim = dim
|
33
|
+
self.db_config = db_config
|
34
|
+
self.case_config = db_case_config
|
35
|
+
self.collection_name = collection_name
|
36
|
+
self.instance_id = db_config["host"].split(".")[0].replace("http://", "").replace("https://", "")
|
37
|
+
|
38
|
+
self._primary_field = "id"
|
39
|
+
self._scalar_field = "int_id"
|
40
|
+
self._vector_field = "vector"
|
41
|
+
self._index_name = "vector_idx"
|
42
|
+
|
43
|
+
self.batch_size = int(
|
44
|
+
min(ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25), ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH)
|
45
|
+
)
|
46
|
+
|
47
|
+
log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
|
48
|
+
control_config = open_api_models.Config(
|
49
|
+
access_key_id=self.db_config["ak"],
|
50
|
+
access_key_secret=self.db_config["sk"],
|
51
|
+
endpoint=self.db_config["control_host"]
|
52
|
+
)
|
53
|
+
self.control_client = searchengineClient(control_config)
|
54
|
+
|
55
|
+
if drop_old:
|
56
|
+
log.info(f"aliyun_OpenSearch client drop old index: {self.collection_name}")
|
57
|
+
if self._index_exists(self.control_client):
|
58
|
+
self._modify_index(self.control_client)
|
59
|
+
else:
|
60
|
+
self._create_index(self.control_client)
|
61
|
+
|
62
|
+
def _create_index(self, client: searchengineClient):
|
63
|
+
create_table_request = searchengine_models.CreateTableRequest()
|
64
|
+
create_table_request.name = self.collection_name
|
65
|
+
create_table_request.primary_key = self._primary_field
|
66
|
+
create_table_request.partition_count = 1
|
67
|
+
create_table_request.field_schema = {
|
68
|
+
self._primary_field: "INT64",
|
69
|
+
self._vector_field: "MULTI_FLOAT",
|
70
|
+
self._scalar_field: "INT64"
|
71
|
+
}
|
72
|
+
vector_index = searchengine_models.ModifyTableRequestVectorIndex()
|
73
|
+
vector_index.index_name = self._index_name
|
74
|
+
vector_index.dimension = self.dim
|
75
|
+
vector_index.distance_type = self.case_config.distance_type()
|
76
|
+
vector_index.vector_field = self._vector_field
|
77
|
+
vector_index.vector_index_type = "HNSW"
|
78
|
+
|
79
|
+
advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
|
80
|
+
advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
|
81
|
+
advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
|
82
|
+
vector_index.advance_params = advance_params
|
83
|
+
create_table_request.vector_index = [vector_index]
|
84
|
+
|
85
|
+
try:
|
86
|
+
response = client.create_table(self.instance_id, create_table_request)
|
87
|
+
log.info(f"create table success: {response.body}")
|
88
|
+
except Exception as error:
|
89
|
+
log.info(error.message)
|
90
|
+
log.info(error.data.get("Recommend"))
|
91
|
+
log.info(f"Failed to create index: error: {str(error)}")
|
92
|
+
raise error from None
|
93
|
+
|
94
|
+
# check if index create success
|
95
|
+
self._active_index(client)
|
96
|
+
|
97
|
+
# check if index create success
|
98
|
+
def _active_index(self, client: searchengineClient) -> None:
|
99
|
+
retry_times = 0
|
100
|
+
while True:
|
101
|
+
time.sleep(10)
|
102
|
+
log.info(f"begin to {retry_times} times get table")
|
103
|
+
retry_times += 1
|
104
|
+
response = client.get_table(self.instance_id, self.collection_name)
|
105
|
+
if response.body.result.status == 'IN_USE':
|
106
|
+
log.info(f"{self.collection_name} table begin to use.")
|
107
|
+
return
|
108
|
+
|
109
|
+
def _index_exists(self, client: searchengineClient) -> bool:
|
110
|
+
try:
|
111
|
+
client.get_table(self.instance_id, self.collection_name)
|
112
|
+
return True
|
113
|
+
except Exception as error:
|
114
|
+
log.info(f'get table from searchengine error')
|
115
|
+
log.info(error.message)
|
116
|
+
return False
|
117
|
+
|
118
|
+
# check if index build success, Insert the embeddings to the vector database after index build success
|
119
|
+
def _index_build_success(self, client: searchengineClient) -> None:
|
120
|
+
log.info(f"begin to check if table build success.")
|
121
|
+
time.sleep(50)
|
122
|
+
|
123
|
+
retry_times = 0
|
124
|
+
while True:
|
125
|
+
time.sleep(10)
|
126
|
+
log.info(f"begin to {retry_times} times get table fsm")
|
127
|
+
retry_times += 1
|
128
|
+
request = searchengine_models.ListTasksRequest()
|
129
|
+
request.start = (int(time.time()) - 3600) * 1000
|
130
|
+
request.end = int(time.time()) * 1000
|
131
|
+
response = client.list_tasks(self.instance_id, request)
|
132
|
+
fsms = response.body.result
|
133
|
+
cur_fsm = None
|
134
|
+
for fsm in fsms:
|
135
|
+
if fsm["type"] != "datasource_flow_fsm":
|
136
|
+
continue
|
137
|
+
if self.collection_name not in fsm["fsmId"]:
|
138
|
+
continue
|
139
|
+
cur_fsm = fsm
|
140
|
+
break
|
141
|
+
if cur_fsm is None:
|
142
|
+
print("no build index fsm")
|
143
|
+
return
|
144
|
+
if "success" == cur_fsm["status"]:
|
145
|
+
return
|
146
|
+
|
147
|
+
def _modify_index(self, client: searchengineClient) -> None:
|
148
|
+
# check if index create success
|
149
|
+
self._active_index(client)
|
150
|
+
|
151
|
+
modify_table_request = searchengine_models.ModifyTableRequest()
|
152
|
+
modify_table_request.partition_count = 1
|
153
|
+
modify_table_request.primary_key = self._primary_field
|
154
|
+
modify_table_request.field_schema = {
|
155
|
+
self._primary_field: "INT64",
|
156
|
+
self._vector_field: "MULTI_FLOAT",
|
157
|
+
self._scalar_field: "INT64"
|
158
|
+
}
|
159
|
+
vector_index = searchengine_models.ModifyTableRequestVectorIndex()
|
160
|
+
vector_index.index_name = self._index_name
|
161
|
+
vector_index.dimension = self.dim
|
162
|
+
vector_index.distance_type = self.case_config.distance_type()
|
163
|
+
vector_index.vector_field = self._vector_field
|
164
|
+
vector_index.vector_index_type = "HNSW"
|
165
|
+
advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
|
166
|
+
advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
|
167
|
+
advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
|
168
|
+
vector_index.advance_params = advance_params
|
169
|
+
|
170
|
+
modify_table_request.vector_index = [vector_index]
|
171
|
+
|
172
|
+
try:
|
173
|
+
response = client.modify_table(self.instance_id, self.collection_name, modify_table_request)
|
174
|
+
log.info(f"modify table success: {response.body}")
|
175
|
+
except Exception as error:
|
176
|
+
log.info(error.message)
|
177
|
+
log.info(error.data.get("Recommend"))
|
178
|
+
log.info(f"Failed to modify index: error: {str(error)}")
|
179
|
+
raise error from None
|
180
|
+
|
181
|
+
# check if modify index & delete data fsm success
|
182
|
+
self._index_build_success(client)
|
183
|
+
|
184
|
+
# get collection records total count
|
185
|
+
def _get_total_count(self):
|
186
|
+
try:
|
187
|
+
response = self.client.stats(self.collection_name)
|
188
|
+
body = json.loads(response.body)
|
189
|
+
log.info(f"stats info: {response.body}")
|
190
|
+
|
191
|
+
if "result" in body and "totalDocCount" in body.get("result"):
|
192
|
+
return body.get("result").get("totalDocCount")
|
193
|
+
else:
|
194
|
+
return 0
|
195
|
+
except Exception as e:
|
196
|
+
print(f"Error querying index: {e}")
|
197
|
+
return 0
|
198
|
+
|
199
|
+
@contextmanager
|
200
|
+
def init(self) -> None:
|
201
|
+
"""connect to aliyun opensearch"""
|
202
|
+
config = models.Config(
|
203
|
+
endpoint=self.db_config["host"],
|
204
|
+
protocol="http",
|
205
|
+
access_user_name=self.db_config["user"],
|
206
|
+
access_pass_word=self.db_config["password"]
|
207
|
+
)
|
208
|
+
|
209
|
+
self.client = client.Client(config)
|
210
|
+
|
211
|
+
yield
|
212
|
+
# self.client.transport.close()
|
213
|
+
self.client = None
|
214
|
+
del self.client
|
215
|
+
|
216
|
+
def insert_embeddings(
|
217
|
+
self,
|
218
|
+
embeddings: list[list[float]],
|
219
|
+
metadata: list[int],
|
220
|
+
**kwargs,
|
221
|
+
) -> tuple[int, Exception]:
|
222
|
+
"""Insert the embeddings to the opensearch."""
|
223
|
+
assert self.client is not None, "should self.init() first"
|
224
|
+
assert len(embeddings) == len(metadata)
|
225
|
+
insert_count = 0
|
226
|
+
|
227
|
+
try:
|
228
|
+
for batch_start_offset in range(0, len(embeddings), self.batch_size):
|
229
|
+
batch_end_offset = min(
|
230
|
+
batch_start_offset + self.batch_size, len(embeddings)
|
231
|
+
)
|
232
|
+
documents = []
|
233
|
+
for i in range(batch_start_offset, batch_end_offset):
|
234
|
+
documentFields = {
|
235
|
+
self._primary_field: metadata[i],
|
236
|
+
self._vector_field: embeddings[i],
|
237
|
+
self._scalar_field: metadata[i],
|
238
|
+
"ops_build_channel": "inc"
|
239
|
+
}
|
240
|
+
document = {
|
241
|
+
"fields": documentFields,
|
242
|
+
"cmd": "add"
|
243
|
+
}
|
244
|
+
documents.append(document)
|
245
|
+
|
246
|
+
pushDocumentsRequest = models.PushDocumentsRequest({}, documents)
|
247
|
+
self.client.push_documents(self.collection_name, self._primary_field, pushDocumentsRequest)
|
248
|
+
insert_count += batch_end_offset - batch_start_offset
|
249
|
+
except Exception as e:
|
250
|
+
log.info(f"Failed to insert data: {e}")
|
251
|
+
return (insert_count, e)
|
252
|
+
return (insert_count, None)
|
253
|
+
|
254
|
+
def search_embedding(
|
255
|
+
self,
|
256
|
+
query: list[float],
|
257
|
+
k: int = 100,
|
258
|
+
filters: dict | None = None,
|
259
|
+
) -> list[int]:
|
260
|
+
assert self.client is not None, "should self.init() first"
|
261
|
+
search_params = "{\"proxima.hnsw.searcher.ef\":"+ str(self.case_config.ef_search) +"}"
|
262
|
+
|
263
|
+
os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
|
264
|
+
|
265
|
+
try:
|
266
|
+
request = QueryRequest(table_name=self.collection_name,
|
267
|
+
vector=query,
|
268
|
+
top_k=k,
|
269
|
+
search_params=search_params, filter=os_filter)
|
270
|
+
result = self.client.query(request)
|
271
|
+
except Exception as e:
|
272
|
+
log.info(f"Error querying index: {e}")
|
273
|
+
raise e
|
274
|
+
res = json.loads(result.body)
|
275
|
+
id_res = [one_res["id"] for one_res in res["result"]]
|
276
|
+
return id_res
|
277
|
+
|
278
|
+
def need_normalize_cosine(self) -> bool:
|
279
|
+
"""Wheather this database need to normalize dataset to support COSINE"""
|
280
|
+
if self.case_config.metric_type == MetricType.COSINE:
|
281
|
+
log.info(f"cosine dataset need normalize.")
|
282
|
+
return True
|
283
|
+
|
284
|
+
return False
|
285
|
+
|
286
|
+
def optimize(self):
|
287
|
+
pass
|
288
|
+
|
289
|
+
def optimize_with_size(self, data_size: int):
|
290
|
+
log.info(f"optimize count: {data_size}")
|
291
|
+
retry_times = 0
|
292
|
+
while True:
|
293
|
+
time.sleep(10)
|
294
|
+
log.info(f"begin to {retry_times} times get optimize table")
|
295
|
+
retry_times += 1
|
296
|
+
total_count = self._get_total_count()
|
297
|
+
# check if the data is inserted
|
298
|
+
if total_count == data_size:
|
299
|
+
log.info(f"optimize table finish.")
|
300
|
+
return
|
301
|
+
|
302
|
+
def ready_to_load(self):
|
303
|
+
"""ready_to_load will be called before load in load cases."""
|
304
|
+
pass
|
@@ -0,0 +1,48 @@
|
|
1
|
+
import logging
|
2
|
+
from enum import Enum
|
3
|
+
from pydantic import SecretStr, BaseModel
|
4
|
+
|
5
|
+
from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
|
6
|
+
|
7
|
+
log = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
|
10
|
+
class AliyunOpenSearchConfig(DBConfig, BaseModel):
|
11
|
+
host: str = ""
|
12
|
+
user: str = ""
|
13
|
+
password: SecretStr = ""
|
14
|
+
|
15
|
+
ak: str = ""
|
16
|
+
sk: SecretStr = ""
|
17
|
+
control_host: str = "searchengine.cn-hangzhou.aliyuncs.com"
|
18
|
+
|
19
|
+
def to_dict(self) -> dict:
|
20
|
+
return {
|
21
|
+
"host": self.host,
|
22
|
+
"user": self.user,
|
23
|
+
"password": self.password.get_secret_value(),
|
24
|
+
"ak": self.ak,
|
25
|
+
"sk": self.sk.get_secret_value(),
|
26
|
+
"control_host": self.control_host,
|
27
|
+
}
|
28
|
+
|
29
|
+
class AliyunOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
30
|
+
metric_type: MetricType = MetricType.L2
|
31
|
+
efConstruction: int = 500
|
32
|
+
M: int = 100
|
33
|
+
ef_search: int = 40
|
34
|
+
|
35
|
+
def distance_type(self) -> str:
|
36
|
+
if self.metric_type == MetricType.L2:
|
37
|
+
return "SquaredEuclidean"
|
38
|
+
elif self.metric_type == MetricType.IP:
|
39
|
+
return "InnerProduct"
|
40
|
+
elif self.metric_type == MetricType.COSINE:
|
41
|
+
return "InnerProduct"
|
42
|
+
return "SquaredEuclidean"
|
43
|
+
|
44
|
+
def index_param(self) -> dict:
|
45
|
+
return {}
|
46
|
+
|
47
|
+
def search_param(self) -> dict:
|
48
|
+
return {}
|