vectordb-bench 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +55 -45
  5. vectordb_bench/backend/clients/__init__.py +75 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +111 -70
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
  11. vectordb_bench/backend/clients/alloydb/cli.py +51 -34
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +5 -9
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +38 -36
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
  25. vectordb_bench/backend/clients/milvus/cli.py +41 -83
  26. vectordb_bench/backend/clients/milvus/config.py +18 -8
  27. vectordb_bench/backend/clients/milvus/milvus.py +18 -19
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +94 -58
  47. vectordb_bench/backend/clients/test/cli.py +1 -2
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +4 -5
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +30 -18
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +85 -34
  61. vectordb_bench/backend/runner/rate_runner.py +30 -19
  62. vectordb_bench/backend/runner/read_write_runner.py +51 -23
  63. vectordb_bench/backend/runner/serial_runner.py +91 -48
  64. vectordb_bench/backend/runner/util.py +4 -3
  65. vectordb_bench/backend/task_runner.py +92 -72
  66. vectordb_bench/backend/utils.py +17 -10
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +56 -26
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +22 -15
  100. vectordb_bench-0.0.20.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.19.dist-info/RECORD +0 -135
  103. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  from enum import Enum
2
- from typing import Type
2
+
3
3
  from .api import (
4
- VectorDB,
5
- DBConfig,
6
4
  DBCaseConfig,
5
+ DBConfig,
7
6
  EmptyDBCaseConfig,
8
7
  IndexType,
9
8
  MetricType,
9
+ VectorDB,
10
10
  )
11
11
 
12
12
 
@@ -41,200 +41,255 @@ class DB(Enum):
41
41
  Test = "test"
42
42
  AliyunOpenSearch = "AliyunOpenSearch"
43
43
 
44
-
45
44
  @property
46
- def init_cls(self) -> Type[VectorDB]:
45
+ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912
47
46
  """Import while in use"""
48
47
  if self == DB.Milvus:
49
48
  from .milvus.milvus import Milvus
49
+
50
50
  return Milvus
51
51
 
52
52
  if self == DB.ZillizCloud:
53
53
  from .zilliz_cloud.zilliz_cloud import ZillizCloud
54
+
54
55
  return ZillizCloud
55
56
 
56
57
  if self == DB.Pinecone:
57
58
  from .pinecone.pinecone import Pinecone
59
+
58
60
  return Pinecone
59
61
 
60
62
  if self == DB.ElasticCloud:
61
63
  from .elastic_cloud.elastic_cloud import ElasticCloud
64
+
62
65
  return ElasticCloud
63
66
 
64
67
  if self == DB.QdrantCloud:
65
68
  from .qdrant_cloud.qdrant_cloud import QdrantCloud
69
+
66
70
  return QdrantCloud
67
71
 
68
72
  if self == DB.WeaviateCloud:
69
73
  from .weaviate_cloud.weaviate_cloud import WeaviateCloud
74
+
70
75
  return WeaviateCloud
71
76
 
72
77
  if self == DB.PgVector:
73
78
  from .pgvector.pgvector import PgVector
79
+
74
80
  return PgVector
75
81
 
76
82
  if self == DB.PgVectoRS:
77
83
  from .pgvecto_rs.pgvecto_rs import PgVectoRS
84
+
78
85
  return PgVectoRS
79
-
86
+
80
87
  if self == DB.PgVectorScale:
81
88
  from .pgvectorscale.pgvectorscale import PgVectorScale
89
+
82
90
  return PgVectorScale
83
91
 
84
92
  if self == DB.PgDiskANN:
85
93
  from .pgdiskann.pgdiskann import PgDiskANN
94
+
86
95
  return PgDiskANN
87
96
 
88
97
  if self == DB.Redis:
89
98
  from .redis.redis import Redis
99
+
90
100
  return Redis
91
-
101
+
92
102
  if self == DB.MemoryDB:
93
103
  from .memorydb.memorydb import MemoryDB
104
+
94
105
  return MemoryDB
95
106
 
96
107
  if self == DB.Chroma:
97
108
  from .chroma.chroma import ChromaClient
109
+
98
110
  return ChromaClient
99
111
 
100
112
  if self == DB.AWSOpenSearch:
101
113
  from .aws_opensearch.aws_opensearch import AWSOpenSearch
114
+
102
115
  return AWSOpenSearch
103
-
116
+
104
117
  if self == DB.AlloyDB:
105
118
  from .alloydb.alloydb import AlloyDB
119
+
106
120
  return AlloyDB
107
121
 
108
122
  if self == DB.AliyunElasticsearch:
109
123
  from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch
124
+
110
125
  return AliyunElasticsearch
111
126
 
112
127
  if self == DB.AliyunOpenSearch:
113
128
  from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
129
+
114
130
  return AliyunOpenSearch
115
131
 
132
+ msg = f"Unknown DB: {self.name}"
133
+ raise ValueError(msg)
134
+
116
135
  @property
117
- def config_cls(self) -> Type[DBConfig]:
136
+ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912
118
137
  """Import while in use"""
119
138
  if self == DB.Milvus:
120
139
  from .milvus.config import MilvusConfig
140
+
121
141
  return MilvusConfig
122
142
 
123
143
  if self == DB.ZillizCloud:
124
144
  from .zilliz_cloud.config import ZillizCloudConfig
145
+
125
146
  return ZillizCloudConfig
126
147
 
127
148
  if self == DB.Pinecone:
128
149
  from .pinecone.config import PineconeConfig
150
+
129
151
  return PineconeConfig
130
152
 
131
153
  if self == DB.ElasticCloud:
132
154
  from .elastic_cloud.config import ElasticCloudConfig
155
+
133
156
  return ElasticCloudConfig
134
157
 
135
158
  if self == DB.QdrantCloud:
136
159
  from .qdrant_cloud.config import QdrantConfig
160
+
137
161
  return QdrantConfig
138
162
 
139
163
  if self == DB.WeaviateCloud:
140
164
  from .weaviate_cloud.config import WeaviateConfig
165
+
141
166
  return WeaviateConfig
142
167
 
143
168
  if self == DB.PgVector:
144
169
  from .pgvector.config import PgVectorConfig
170
+
145
171
  return PgVectorConfig
146
172
 
147
173
  if self == DB.PgVectoRS:
148
174
  from .pgvecto_rs.config import PgVectoRSConfig
175
+
149
176
  return PgVectoRSConfig
150
177
 
151
178
  if self == DB.PgVectorScale:
152
179
  from .pgvectorscale.config import PgVectorScaleConfig
180
+
153
181
  return PgVectorScaleConfig
154
182
 
155
183
  if self == DB.PgDiskANN:
156
184
  from .pgdiskann.config import PgDiskANNConfig
185
+
157
186
  return PgDiskANNConfig
158
187
 
159
188
  if self == DB.Redis:
160
189
  from .redis.config import RedisConfig
190
+
161
191
  return RedisConfig
162
-
192
+
163
193
  if self == DB.MemoryDB:
164
194
  from .memorydb.config import MemoryDBConfig
195
+
165
196
  return MemoryDBConfig
166
197
 
167
198
  if self == DB.Chroma:
168
199
  from .chroma.config import ChromaConfig
200
+
169
201
  return ChromaConfig
170
202
 
171
203
  if self == DB.AWSOpenSearch:
172
204
  from .aws_opensearch.config import AWSOpenSearchConfig
205
+
173
206
  return AWSOpenSearchConfig
174
-
207
+
175
208
  if self == DB.AlloyDB:
176
209
  from .alloydb.config import AlloyDBConfig
210
+
177
211
  return AlloyDBConfig
178
212
 
179
213
  if self == DB.AliyunElasticsearch:
180
214
  from .aliyun_elasticsearch.config import AliyunElasticsearchConfig
215
+
181
216
  return AliyunElasticsearchConfig
182
217
 
183
218
  if self == DB.AliyunOpenSearch:
184
219
  from .aliyun_opensearch.config import AliyunOpenSearchConfig
220
+
185
221
  return AliyunOpenSearchConfig
186
222
 
187
- def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
223
+ msg = f"Unknown DB: {self.name}"
224
+ raise ValueError(msg)
225
+
226
+ def case_config_cls( # noqa: PLR0911
227
+ self,
228
+ index_type: IndexType | None = None,
229
+ ) -> type[DBCaseConfig]:
188
230
  if self == DB.Milvus:
189
231
  from .milvus.config import _milvus_case_config
232
+
190
233
  return _milvus_case_config.get(index_type)
191
234
 
192
235
  if self == DB.ZillizCloud:
193
236
  from .zilliz_cloud.config import AutoIndexConfig
237
+
194
238
  return AutoIndexConfig
195
239
 
196
240
  if self == DB.ElasticCloud:
197
241
  from .elastic_cloud.config import ElasticCloudIndexConfig
242
+
198
243
  return ElasticCloudIndexConfig
199
244
 
200
245
  if self == DB.QdrantCloud:
201
246
  from .qdrant_cloud.config import QdrantIndexConfig
247
+
202
248
  return QdrantIndexConfig
203
249
 
204
250
  if self == DB.WeaviateCloud:
205
251
  from .weaviate_cloud.config import WeaviateIndexConfig
252
+
206
253
  return WeaviateIndexConfig
207
254
 
208
255
  if self == DB.PgVector:
209
256
  from .pgvector.config import _pgvector_case_config
257
+
210
258
  return _pgvector_case_config.get(index_type)
211
259
 
212
260
  if self == DB.PgVectoRS:
213
261
  from .pgvecto_rs.config import _pgvecto_rs_case_config
262
+
214
263
  return _pgvecto_rs_case_config.get(index_type)
215
264
 
216
265
  if self == DB.AWSOpenSearch:
217
266
  from .aws_opensearch.config import AWSOpenSearchIndexConfig
267
+
218
268
  return AWSOpenSearchIndexConfig
219
269
 
220
270
  if self == DB.PgVectorScale:
221
271
  from .pgvectorscale.config import _pgvectorscale_case_config
272
+
222
273
  return _pgvectorscale_case_config.get(index_type)
223
274
 
224
275
  if self == DB.PgDiskANN:
225
276
  from .pgdiskann.config import _pgdiskann_case_config
277
+
226
278
  return _pgdiskann_case_config.get(index_type)
227
-
279
+
228
280
  if self == DB.AlloyDB:
229
281
  from .alloydb.config import _alloydb_case_config
282
+
230
283
  return _alloydb_case_config.get(index_type)
231
284
 
232
285
  if self == DB.AliyunElasticsearch:
233
286
  from .elastic_cloud.config import ElasticCloudIndexConfig
287
+
234
288
  return ElasticCloudIndexConfig
235
289
 
236
290
  if self == DB.AliyunOpenSearch:
237
291
  from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
292
+
238
293
  return AliyunOpenSearchIndexConfig
239
294
 
240
295
  # DB.Pinecone, DB.Chroma, DB.Redis
@@ -242,5 +297,11 @@ class DB(Enum):
242
297
 
243
298
 
244
299
  __all__ = [
245
- "DB", "VectorDB", "DBConfig", "DBCaseConfig", "IndexType", "MetricType", "EmptyDBCaseConfig",
300
+ "DB",
301
+ "DBCaseConfig",
302
+ "DBConfig",
303
+ "EmptyDBCaseConfig",
304
+ "IndexType",
305
+ "MetricType",
306
+ "VectorDB",
246
307
  ]
@@ -1,5 +1,5 @@
1
- from ..elastic_cloud.elastic_cloud import ElasticCloud
2
1
  from ..elastic_cloud.config import ElasticCloudIndexConfig
2
+ from ..elastic_cloud.elastic_cloud import ElasticCloud
3
3
 
4
4
 
5
5
  class AliyunElasticsearch(ElasticCloud):
@@ -24,4 +24,3 @@ class AliyunElasticsearch(ElasticCloud):
24
24
  drop_old=drop_old,
25
25
  **kwargs,
26
26
  )
27
-
@@ -1,7 +1,6 @@
1
- from enum import Enum
2
- from pydantic import SecretStr, BaseModel
1
+ from pydantic import BaseModel, SecretStr
3
2
 
4
- from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
3
+ from ..api import DBConfig
5
4
 
6
5
 
7
6
  class AliyunElasticsearchConfig(DBConfig, BaseModel):
@@ -14,6 +13,6 @@ class AliyunElasticsearchConfig(DBConfig, BaseModel):
14
13
 
15
14
  def to_dict(self) -> dict:
16
15
  return {
17
- "hosts": [{'scheme': self.scheme, 'host': self.host, 'port': self.port}],
16
+ "hosts": [{"scheme": self.scheme, "host": self.host, "port": self.port}],
18
17
  "basic_auth": (self.user, self.password.get_secret_value()),
19
18
  }
@@ -1,32 +1,32 @@
1
1
  import json
2
2
  import logging
3
- from contextlib import contextmanager
4
3
  import time
4
+ from contextlib import contextmanager
5
5
 
6
+ from alibabacloud_ha3engine_vector import client, models
6
7
  from alibabacloud_ha3engine_vector.models import QueryRequest
7
-
8
- from ..api import VectorDB, MetricType
9
- from .config import AliyunOpenSearchIndexConfig
10
-
11
- from alibabacloud_searchengine20211025.client import Client as searchengineClient
12
8
  from alibabacloud_searchengine20211025 import models as searchengine_models
9
+ from alibabacloud_searchengine20211025.client import Client as searchengineClient
13
10
  from alibabacloud_tea_openapi import models as open_api_models
14
- from alibabacloud_ha3engine_vector import models, client
11
+
12
+ from ..api import MetricType, VectorDB
13
+ from .config import AliyunOpenSearchIndexConfig
15
14
 
16
15
  log = logging.getLogger(__name__)
17
16
 
18
17
  ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
19
18
  ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
20
19
 
20
+
21
21
  class AliyunOpenSearch(VectorDB):
22
22
  def __init__(
23
- self,
24
- dim: int,
25
- db_config: dict,
26
- db_case_config: AliyunOpenSearchIndexConfig,
27
- collection_name: str = "VectorDBBenchCollection",
28
- drop_old: bool = False,
29
- **kwargs,
23
+ self,
24
+ dim: int,
25
+ db_config: dict,
26
+ db_case_config: AliyunOpenSearchIndexConfig,
27
+ collection_name: str = "VectorDBBenchCollection",
28
+ drop_old: bool = False,
29
+ **kwargs,
30
30
  ):
31
31
  self.control_client = None
32
32
  self.dim = dim
@@ -41,14 +41,17 @@ class AliyunOpenSearch(VectorDB):
41
41
  self._index_name = "vector_idx"
42
42
 
43
43
  self.batch_size = int(
44
- min(ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25), ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH)
44
+ min(
45
+ ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25),
46
+ ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH,
47
+ ),
45
48
  )
46
49
 
47
50
  log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
48
51
  control_config = open_api_models.Config(
49
52
  access_key_id=self.db_config["ak"],
50
53
  access_key_secret=self.db_config["sk"],
51
- endpoint=self.db_config["control_host"]
54
+ endpoint=self.db_config["control_host"],
52
55
  )
53
56
  self.control_client = searchengineClient(control_config)
54
57
 
@@ -67,7 +70,7 @@ class AliyunOpenSearch(VectorDB):
67
70
  create_table_request.field_schema = {
68
71
  self._primary_field: "INT64",
69
72
  self._vector_field: "MULTI_FLOAT",
70
- self._scalar_field: "INT64"
73
+ self._scalar_field: "INT64",
71
74
  }
72
75
  vector_index = searchengine_models.ModifyTableRequestVectorIndex()
73
76
  vector_index.index_name = self._index_name
@@ -77,8 +80,25 @@ class AliyunOpenSearch(VectorDB):
77
80
  vector_index.vector_index_type = "HNSW"
78
81
 
79
82
  advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
80
- advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
81
- advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
83
+ str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
84
+ str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
85
+ str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
86
+ str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
87
+ str_thread_count = '"proxima.hnsw.builder.thread_count":16'
88
+
89
+ params = ",".join(
90
+ [
91
+ str_max_neighbor_count,
92
+ str_efc,
93
+ str_enable_adsampling,
94
+ str_slack_pruning_factor,
95
+ str_thread_count,
96
+ ],
97
+ )
98
+ advance_params.build_index_params = params
99
+ advance_params.search_index_params = (
100
+ '{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
101
+ )
82
102
  vector_index.advance_params = advance_params
83
103
  create_table_request.vector_index = [vector_index]
84
104
 
@@ -88,7 +108,7 @@ class AliyunOpenSearch(VectorDB):
88
108
  except Exception as error:
89
109
  log.info(error.message)
90
110
  log.info(error.data.get("Recommend"))
91
- log.info(f"Failed to create index: error: {str(error)}")
111
+ log.info(f"Failed to create index: error: {error!s}")
92
112
  raise error from None
93
113
 
94
114
  # check if index create success
@@ -102,22 +122,22 @@ class AliyunOpenSearch(VectorDB):
102
122
  log.info(f"begin to {retry_times} times get table")
103
123
  retry_times += 1
104
124
  response = client.get_table(self.instance_id, self.collection_name)
105
- if response.body.result.status == 'IN_USE':
125
+ if response.body.result.status == "IN_USE":
106
126
  log.info(f"{self.collection_name} table begin to use.")
107
127
  return
108
128
 
109
129
  def _index_exists(self, client: searchengineClient) -> bool:
110
130
  try:
111
131
  client.get_table(self.instance_id, self.collection_name)
112
- return True
113
- except Exception as error:
114
- log.info(f'get table from searchengine error')
115
- log.info(error.message)
132
+ except Exception as err:
133
+ log.warning(f"get table from searchengine error, err={err}")
116
134
  return False
135
+ else:
136
+ return True
117
137
 
118
138
  # check if index build success, Insert the embeddings to the vector database after index build success
119
139
  def _index_build_success(self, client: searchengineClient) -> None:
120
- log.info(f"begin to check if table build success.")
140
+ log.info("begin to check if table build success.")
121
141
  time.sleep(50)
122
142
 
123
143
  retry_times = 0
@@ -139,9 +159,9 @@ class AliyunOpenSearch(VectorDB):
139
159
  cur_fsm = fsm
140
160
  break
141
161
  if cur_fsm is None:
142
- print("no build index fsm")
162
+ log.warning("no build index fsm")
143
163
  return
144
- if "success" == cur_fsm["status"]:
164
+ if cur_fsm["status"] == "success":
145
165
  return
146
166
 
147
167
  def _modify_index(self, client: searchengineClient) -> None:
@@ -154,7 +174,7 @@ class AliyunOpenSearch(VectorDB):
154
174
  modify_table_request.field_schema = {
155
175
  self._primary_field: "INT64",
156
176
  self._vector_field: "MULTI_FLOAT",
157
- self._scalar_field: "INT64"
177
+ self._scalar_field: "INT64",
158
178
  }
159
179
  vector_index = searchengine_models.ModifyTableRequestVectorIndex()
160
180
  vector_index.index_name = self._index_name
@@ -163,19 +183,41 @@ class AliyunOpenSearch(VectorDB):
163
183
  vector_index.vector_field = self._vector_field
164
184
  vector_index.vector_index_type = "HNSW"
165
185
  advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
166
- advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
167
- advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
186
+
187
+ str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
188
+ str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
189
+ str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
190
+ str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
191
+ str_thread_count = '"proxima.hnsw.builder.thread_count":16'
192
+
193
+ params = ",".join(
194
+ [
195
+ str_max_neighbor_count,
196
+ str_efc,
197
+ str_enable_adsampling,
198
+ str_slack_pruning_factor,
199
+ str_thread_count,
200
+ ],
201
+ )
202
+ advance_params.build_index_params = params
203
+ advance_params.search_index_params = (
204
+ '{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
205
+ )
168
206
  vector_index.advance_params = advance_params
169
207
 
170
208
  modify_table_request.vector_index = [vector_index]
171
209
 
172
210
  try:
173
- response = client.modify_table(self.instance_id, self.collection_name, modify_table_request)
211
+ response = client.modify_table(
212
+ self.instance_id,
213
+ self.collection_name,
214
+ modify_table_request,
215
+ )
174
216
  log.info(f"modify table success: {response.body}")
175
217
  except Exception as error:
176
218
  log.info(error.message)
177
219
  log.info(error.data.get("Recommend"))
178
- log.info(f"Failed to modify index: error: {str(error)}")
220
+ log.info(f"Failed to modify index: error: {error!s}")
179
221
  raise error from None
180
222
 
181
223
  # check if modify index & delete data fsm success
@@ -185,15 +227,14 @@ class AliyunOpenSearch(VectorDB):
185
227
  def _get_total_count(self):
186
228
  try:
187
229
  response = self.client.stats(self.collection_name)
230
+ except Exception as e:
231
+ log.warning(f"Error querying index: {e}")
232
+ else:
188
233
  body = json.loads(response.body)
189
234
  log.info(f"stats info: {response.body}")
190
235
 
191
236
  if "result" in body and "totalDocCount" in body.get("result"):
192
237
  return body.get("result").get("totalDocCount")
193
- else:
194
- return 0
195
- except Exception as e:
196
- print(f"Error querying index: {e}")
197
238
  return 0
198
239
 
199
240
  @contextmanager
@@ -203,21 +244,20 @@ class AliyunOpenSearch(VectorDB):
203
244
  endpoint=self.db_config["host"],
204
245
  protocol="http",
205
246
  access_user_name=self.db_config["user"],
206
- access_pass_word=self.db_config["password"]
247
+ access_pass_word=self.db_config["password"],
207
248
  )
208
249
 
209
250
  self.client = client.Client(config)
210
251
 
211
252
  yield
212
- # self.client.transport.close()
213
253
  self.client = None
214
254
  del self.client
215
255
 
216
256
  def insert_embeddings(
217
- self,
218
- embeddings: list[list[float]],
219
- metadata: list[int],
220
- **kwargs,
257
+ self,
258
+ embeddings: list[list[float]],
259
+ metadata: list[int],
260
+ **kwargs,
221
261
  ) -> tuple[int, Exception]:
222
262
  """Insert the embeddings to the opensearch."""
223
263
  assert self.client is not None, "should self.init() first"
@@ -226,25 +266,24 @@ class AliyunOpenSearch(VectorDB):
226
266
 
227
267
  try:
228
268
  for batch_start_offset in range(0, len(embeddings), self.batch_size):
229
- batch_end_offset = min(
230
- batch_start_offset + self.batch_size, len(embeddings)
231
- )
269
+ batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
232
270
  documents = []
233
271
  for i in range(batch_start_offset, batch_end_offset):
234
- documentFields = {
272
+ document_fields = {
235
273
  self._primary_field: metadata[i],
236
274
  self._vector_field: embeddings[i],
237
275
  self._scalar_field: metadata[i],
238
- "ops_build_channel": "inc"
239
- }
240
- document = {
241
- "fields": documentFields,
242
- "cmd": "add"
276
+ "ops_build_channel": "inc",
243
277
  }
278
+ document = {"fields": document_fields, "cmd": "add"}
244
279
  documents.append(document)
245
280
 
246
- pushDocumentsRequest = models.PushDocumentsRequest({}, documents)
247
- self.client.push_documents(self.collection_name, self._primary_field, pushDocumentsRequest)
281
+ push_doc_req = models.PushDocumentsRequest({}, documents)
282
+ self.client.push_documents(
283
+ self.collection_name,
284
+ self._primary_field,
285
+ push_doc_req,
286
+ )
248
287
  insert_count += batch_end_offset - batch_start_offset
249
288
  except Exception as e:
250
289
  log.info(f"Failed to insert data: {e}")
@@ -252,33 +291,36 @@ class AliyunOpenSearch(VectorDB):
252
291
  return (insert_count, None)
253
292
 
254
293
  def search_embedding(
255
- self,
256
- query: list[float],
257
- k: int = 100,
258
- filters: dict | None = None,
294
+ self,
295
+ query: list[float],
296
+ k: int = 100,
297
+ filters: dict | None = None,
259
298
  ) -> list[int]:
260
299
  assert self.client is not None, "should self.init() first"
261
- search_params = "{\"proxima.hnsw.searcher.ef\":"+ str(self.case_config.ef_search) +"}"
300
+ search_params = '{"proxima.hnsw.searcher.ef":' + str(self.case_config.ef_search) + "}"
262
301
 
263
302
  os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
264
303
 
265
304
  try:
266
- request = QueryRequest(table_name=self.collection_name,
267
- vector=query,
268
- top_k=k,
269
- search_params=search_params, filter=os_filter)
305
+ request = QueryRequest(
306
+ table_name=self.collection_name,
307
+ vector=query,
308
+ top_k=k,
309
+ search_params=search_params,
310
+ filter=os_filter,
311
+ )
270
312
  result = self.client.query(request)
271
313
  except Exception as e:
272
314
  log.info(f"Error querying index: {e}")
273
- raise e
274
- res = json.loads(result.body)
275
- id_res = [one_res["id"] for one_res in res["result"]]
276
- return id_res
315
+ raise e from e
316
+ else:
317
+ res = json.loads(result.body)
318
+ return [one_res["id"] for one_res in res["result"]]
277
319
 
278
320
  def need_normalize_cosine(self) -> bool:
279
321
  """Wheather this database need to normalize dataset to support COSINE"""
280
322
  if self.case_config.metric_type == MetricType.COSINE:
281
- log.info(f"cosine dataset need normalize.")
323
+ log.info("cosine dataset need normalize.")
282
324
  return True
283
325
 
284
326
  return False
@@ -296,9 +338,8 @@ class AliyunOpenSearch(VectorDB):
296
338
  total_count = self._get_total_count()
297
339
  # check if the data is inserted
298
340
  if total_count == data_size:
299
- log.info(f"optimize table finish.")
341
+ log.info("optimize table finish.")
300
342
  return
301
343
 
302
344
  def ready_to_load(self):
303
345
  """ready_to_load will be called before load in load cases."""
304
- pass