vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,32 +1,32 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
from contextlib import contextmanager
|
4
3
|
import time
|
4
|
+
from contextlib import contextmanager
|
5
5
|
|
6
|
+
from alibabacloud_ha3engine_vector import client, models
|
6
7
|
from alibabacloud_ha3engine_vector.models import QueryRequest
|
7
|
-
|
8
|
-
from ..api import VectorDB, MetricType
|
9
|
-
from .config import AliyunOpenSearchIndexConfig
|
10
|
-
|
11
|
-
from alibabacloud_searchengine20211025.client import Client as searchengineClient
|
12
8
|
from alibabacloud_searchengine20211025 import models as searchengine_models
|
9
|
+
from alibabacloud_searchengine20211025.client import Client as searchengineClient
|
13
10
|
from alibabacloud_tea_openapi import models as open_api_models
|
14
|
-
|
11
|
+
|
12
|
+
from ..api import MetricType, VectorDB
|
13
|
+
from .config import AliyunOpenSearchIndexConfig
|
15
14
|
|
16
15
|
log = logging.getLogger(__name__)
|
17
16
|
|
18
17
|
ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
|
19
18
|
ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
|
20
19
|
|
20
|
+
|
21
21
|
class AliyunOpenSearch(VectorDB):
|
22
22
|
def __init__(
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
self,
|
24
|
+
dim: int,
|
25
|
+
db_config: dict,
|
26
|
+
db_case_config: AliyunOpenSearchIndexConfig,
|
27
|
+
collection_name: str = "VectorDBBenchCollection",
|
28
|
+
drop_old: bool = False,
|
29
|
+
**kwargs,
|
30
30
|
):
|
31
31
|
self.control_client = None
|
32
32
|
self.dim = dim
|
@@ -41,14 +41,17 @@ class AliyunOpenSearch(VectorDB):
|
|
41
41
|
self._index_name = "vector_idx"
|
42
42
|
|
43
43
|
self.batch_size = int(
|
44
|
-
min(
|
44
|
+
min(
|
45
|
+
ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25),
|
46
|
+
ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH,
|
47
|
+
),
|
45
48
|
)
|
46
49
|
|
47
50
|
log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
|
48
51
|
control_config = open_api_models.Config(
|
49
52
|
access_key_id=self.db_config["ak"],
|
50
53
|
access_key_secret=self.db_config["sk"],
|
51
|
-
endpoint=self.db_config["control_host"]
|
54
|
+
endpoint=self.db_config["control_host"],
|
52
55
|
)
|
53
56
|
self.control_client = searchengineClient(control_config)
|
54
57
|
|
@@ -67,7 +70,7 @@ class AliyunOpenSearch(VectorDB):
|
|
67
70
|
create_table_request.field_schema = {
|
68
71
|
self._primary_field: "INT64",
|
69
72
|
self._vector_field: "MULTI_FLOAT",
|
70
|
-
self._scalar_field: "INT64"
|
73
|
+
self._scalar_field: "INT64",
|
71
74
|
}
|
72
75
|
vector_index = searchengine_models.ModifyTableRequestVectorIndex()
|
73
76
|
vector_index.index_name = self._index_name
|
@@ -77,8 +80,25 @@ class AliyunOpenSearch(VectorDB):
|
|
77
80
|
vector_index.vector_index_type = "HNSW"
|
78
81
|
|
79
82
|
advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
|
80
|
-
|
81
|
-
|
83
|
+
str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
|
84
|
+
str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
|
85
|
+
str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
|
86
|
+
str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
|
87
|
+
str_thread_count = '"proxima.hnsw.builder.thread_count":16'
|
88
|
+
|
89
|
+
params = ",".join(
|
90
|
+
[
|
91
|
+
str_max_neighbor_count,
|
92
|
+
str_efc,
|
93
|
+
str_enable_adsampling,
|
94
|
+
str_slack_pruning_factor,
|
95
|
+
str_thread_count,
|
96
|
+
],
|
97
|
+
)
|
98
|
+
advance_params.build_index_params = params
|
99
|
+
advance_params.search_index_params = (
|
100
|
+
'{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
|
101
|
+
)
|
82
102
|
vector_index.advance_params = advance_params
|
83
103
|
create_table_request.vector_index = [vector_index]
|
84
104
|
|
@@ -88,7 +108,7 @@ class AliyunOpenSearch(VectorDB):
|
|
88
108
|
except Exception as error:
|
89
109
|
log.info(error.message)
|
90
110
|
log.info(error.data.get("Recommend"))
|
91
|
-
log.info(f"Failed to create index: error: {
|
111
|
+
log.info(f"Failed to create index: error: {error!s}")
|
92
112
|
raise error from None
|
93
113
|
|
94
114
|
# check if index create success
|
@@ -102,22 +122,22 @@ class AliyunOpenSearch(VectorDB):
|
|
102
122
|
log.info(f"begin to {retry_times} times get table")
|
103
123
|
retry_times += 1
|
104
124
|
response = client.get_table(self.instance_id, self.collection_name)
|
105
|
-
if response.body.result.status ==
|
125
|
+
if response.body.result.status == "IN_USE":
|
106
126
|
log.info(f"{self.collection_name} table begin to use.")
|
107
127
|
return
|
108
128
|
|
109
129
|
def _index_exists(self, client: searchengineClient) -> bool:
|
110
130
|
try:
|
111
131
|
client.get_table(self.instance_id, self.collection_name)
|
112
|
-
|
113
|
-
|
114
|
-
log.info(f'get table from searchengine error')
|
115
|
-
log.info(error.message)
|
132
|
+
except Exception as err:
|
133
|
+
log.warning(f"get table from searchengine error, err={err}")
|
116
134
|
return False
|
135
|
+
else:
|
136
|
+
return True
|
117
137
|
|
118
138
|
# check if index build success, Insert the embeddings to the vector database after index build success
|
119
139
|
def _index_build_success(self, client: searchengineClient) -> None:
|
120
|
-
log.info(
|
140
|
+
log.info("begin to check if table build success.")
|
121
141
|
time.sleep(50)
|
122
142
|
|
123
143
|
retry_times = 0
|
@@ -139,9 +159,9 @@ class AliyunOpenSearch(VectorDB):
|
|
139
159
|
cur_fsm = fsm
|
140
160
|
break
|
141
161
|
if cur_fsm is None:
|
142
|
-
|
162
|
+
log.warning("no build index fsm")
|
143
163
|
return
|
144
|
-
if "
|
164
|
+
if cur_fsm["status"] == "success":
|
145
165
|
return
|
146
166
|
|
147
167
|
def _modify_index(self, client: searchengineClient) -> None:
|
@@ -154,7 +174,7 @@ class AliyunOpenSearch(VectorDB):
|
|
154
174
|
modify_table_request.field_schema = {
|
155
175
|
self._primary_field: "INT64",
|
156
176
|
self._vector_field: "MULTI_FLOAT",
|
157
|
-
self._scalar_field: "INT64"
|
177
|
+
self._scalar_field: "INT64",
|
158
178
|
}
|
159
179
|
vector_index = searchengine_models.ModifyTableRequestVectorIndex()
|
160
180
|
vector_index.index_name = self._index_name
|
@@ -163,19 +183,41 @@ class AliyunOpenSearch(VectorDB):
|
|
163
183
|
vector_index.vector_field = self._vector_field
|
164
184
|
vector_index.vector_index_type = "HNSW"
|
165
185
|
advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
|
166
|
-
|
167
|
-
|
186
|
+
|
187
|
+
str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
|
188
|
+
str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
|
189
|
+
str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
|
190
|
+
str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
|
191
|
+
str_thread_count = '"proxima.hnsw.builder.thread_count":16'
|
192
|
+
|
193
|
+
params = ",".join(
|
194
|
+
[
|
195
|
+
str_max_neighbor_count,
|
196
|
+
str_efc,
|
197
|
+
str_enable_adsampling,
|
198
|
+
str_slack_pruning_factor,
|
199
|
+
str_thread_count,
|
200
|
+
],
|
201
|
+
)
|
202
|
+
advance_params.build_index_params = params
|
203
|
+
advance_params.search_index_params = (
|
204
|
+
'{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
|
205
|
+
)
|
168
206
|
vector_index.advance_params = advance_params
|
169
207
|
|
170
208
|
modify_table_request.vector_index = [vector_index]
|
171
209
|
|
172
210
|
try:
|
173
|
-
response = client.modify_table(
|
211
|
+
response = client.modify_table(
|
212
|
+
self.instance_id,
|
213
|
+
self.collection_name,
|
214
|
+
modify_table_request,
|
215
|
+
)
|
174
216
|
log.info(f"modify table success: {response.body}")
|
175
217
|
except Exception as error:
|
176
218
|
log.info(error.message)
|
177
219
|
log.info(error.data.get("Recommend"))
|
178
|
-
log.info(f"Failed to modify index: error: {
|
220
|
+
log.info(f"Failed to modify index: error: {error!s}")
|
179
221
|
raise error from None
|
180
222
|
|
181
223
|
# check if modify index & delete data fsm success
|
@@ -185,15 +227,14 @@ class AliyunOpenSearch(VectorDB):
|
|
185
227
|
def _get_total_count(self):
|
186
228
|
try:
|
187
229
|
response = self.client.stats(self.collection_name)
|
230
|
+
except Exception as e:
|
231
|
+
log.warning(f"Error querying index: {e}")
|
232
|
+
else:
|
188
233
|
body = json.loads(response.body)
|
189
234
|
log.info(f"stats info: {response.body}")
|
190
235
|
|
191
236
|
if "result" in body and "totalDocCount" in body.get("result"):
|
192
237
|
return body.get("result").get("totalDocCount")
|
193
|
-
else:
|
194
|
-
return 0
|
195
|
-
except Exception as e:
|
196
|
-
print(f"Error querying index: {e}")
|
197
238
|
return 0
|
198
239
|
|
199
240
|
@contextmanager
|
@@ -203,21 +244,20 @@ class AliyunOpenSearch(VectorDB):
|
|
203
244
|
endpoint=self.db_config["host"],
|
204
245
|
protocol="http",
|
205
246
|
access_user_name=self.db_config["user"],
|
206
|
-
access_pass_word=self.db_config["password"]
|
247
|
+
access_pass_word=self.db_config["password"],
|
207
248
|
)
|
208
249
|
|
209
250
|
self.client = client.Client(config)
|
210
251
|
|
211
252
|
yield
|
212
|
-
# self.client.transport.close()
|
213
253
|
self.client = None
|
214
254
|
del self.client
|
215
255
|
|
216
256
|
def insert_embeddings(
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
257
|
+
self,
|
258
|
+
embeddings: list[list[float]],
|
259
|
+
metadata: list[int],
|
260
|
+
**kwargs,
|
221
261
|
) -> tuple[int, Exception]:
|
222
262
|
"""Insert the embeddings to the opensearch."""
|
223
263
|
assert self.client is not None, "should self.init() first"
|
@@ -226,25 +266,24 @@ class AliyunOpenSearch(VectorDB):
|
|
226
266
|
|
227
267
|
try:
|
228
268
|
for batch_start_offset in range(0, len(embeddings), self.batch_size):
|
229
|
-
batch_end_offset = min(
|
230
|
-
batch_start_offset + self.batch_size, len(embeddings)
|
231
|
-
)
|
269
|
+
batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
|
232
270
|
documents = []
|
233
271
|
for i in range(batch_start_offset, batch_end_offset):
|
234
|
-
|
272
|
+
document_fields = {
|
235
273
|
self._primary_field: metadata[i],
|
236
274
|
self._vector_field: embeddings[i],
|
237
275
|
self._scalar_field: metadata[i],
|
238
|
-
"ops_build_channel": "inc"
|
239
|
-
}
|
240
|
-
document = {
|
241
|
-
"fields": documentFields,
|
242
|
-
"cmd": "add"
|
276
|
+
"ops_build_channel": "inc",
|
243
277
|
}
|
278
|
+
document = {"fields": document_fields, "cmd": "add"}
|
244
279
|
documents.append(document)
|
245
280
|
|
246
|
-
|
247
|
-
self.client.push_documents(
|
281
|
+
push_doc_req = models.PushDocumentsRequest({}, documents)
|
282
|
+
self.client.push_documents(
|
283
|
+
self.collection_name,
|
284
|
+
self._primary_field,
|
285
|
+
push_doc_req,
|
286
|
+
)
|
248
287
|
insert_count += batch_end_offset - batch_start_offset
|
249
288
|
except Exception as e:
|
250
289
|
log.info(f"Failed to insert data: {e}")
|
@@ -252,41 +291,41 @@ class AliyunOpenSearch(VectorDB):
|
|
252
291
|
return (insert_count, None)
|
253
292
|
|
254
293
|
def search_embedding(
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
294
|
+
self,
|
295
|
+
query: list[float],
|
296
|
+
k: int = 100,
|
297
|
+
filters: dict | None = None,
|
259
298
|
) -> list[int]:
|
260
299
|
assert self.client is not None, "should self.init() first"
|
261
|
-
search_params =
|
300
|
+
search_params = '{"proxima.hnsw.searcher.ef":' + str(self.case_config.ef_search) + "}"
|
262
301
|
|
263
302
|
os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
|
264
303
|
|
265
304
|
try:
|
266
|
-
request = QueryRequest(
|
267
|
-
|
268
|
-
|
269
|
-
|
305
|
+
request = QueryRequest(
|
306
|
+
table_name=self.collection_name,
|
307
|
+
vector=query,
|
308
|
+
top_k=k,
|
309
|
+
search_params=search_params,
|
310
|
+
filter=os_filter,
|
311
|
+
)
|
270
312
|
result = self.client.query(request)
|
271
313
|
except Exception as e:
|
272
314
|
log.info(f"Error querying index: {e}")
|
273
|
-
raise e
|
274
|
-
|
275
|
-
|
276
|
-
|
315
|
+
raise e from e
|
316
|
+
else:
|
317
|
+
res = json.loads(result.body)
|
318
|
+
return [one_res["id"] for one_res in res["result"]]
|
277
319
|
|
278
320
|
def need_normalize_cosine(self) -> bool:
|
279
321
|
"""Wheather this database need to normalize dataset to support COSINE"""
|
280
322
|
if self.case_config.metric_type == MetricType.COSINE:
|
281
|
-
log.info(
|
323
|
+
log.info("cosine dataset need normalize.")
|
282
324
|
return True
|
283
325
|
|
284
326
|
return False
|
285
327
|
|
286
|
-
def optimize(self):
|
287
|
-
pass
|
288
|
-
|
289
|
-
def optimize_with_size(self, data_size: int):
|
328
|
+
def optimize(self, data_size: int):
|
290
329
|
log.info(f"optimize count: {data_size}")
|
291
330
|
retry_times = 0
|
292
331
|
while True:
|
@@ -296,9 +335,5 @@ class AliyunOpenSearch(VectorDB):
|
|
296
335
|
total_count = self._get_total_count()
|
297
336
|
# check if the data is inserted
|
298
337
|
if total_count == data_size:
|
299
|
-
log.info(
|
338
|
+
log.info("optimize table finish.")
|
300
339
|
return
|
301
|
-
|
302
|
-
def ready_to_load(self):
|
303
|
-
"""ready_to_load will be called before load in load cases."""
|
304
|
-
pass
|
@@ -1,8 +1,8 @@
|
|
1
1
|
import logging
|
2
|
-
from enum import Enum
|
3
|
-
from pydantic import SecretStr, BaseModel
|
4
2
|
|
5
|
-
from
|
3
|
+
from pydantic import BaseModel, SecretStr
|
4
|
+
|
5
|
+
from ..api import DBCaseConfig, DBConfig, MetricType
|
6
6
|
|
7
7
|
log = logging.getLogger(__name__)
|
8
8
|
|
@@ -26,18 +26,17 @@ class AliyunOpenSearchConfig(DBConfig, BaseModel):
|
|
26
26
|
"control_host": self.control_host,
|
27
27
|
}
|
28
28
|
|
29
|
+
|
29
30
|
class AliyunOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
30
31
|
metric_type: MetricType = MetricType.L2
|
31
|
-
|
32
|
+
ef_construction: int = 500
|
32
33
|
M: int = 100
|
33
34
|
ef_search: int = 40
|
34
35
|
|
35
36
|
def distance_type(self) -> str:
|
36
37
|
if self.metric_type == MetricType.L2:
|
37
38
|
return "SquaredEuclidean"
|
38
|
-
|
39
|
-
return "InnerProduct"
|
40
|
-
elif self.metric_type == MetricType.COSINE:
|
39
|
+
if self.metric_type in (MetricType.IP, MetricType.COSINE):
|
41
40
|
return "InnerProduct"
|
42
41
|
return "SquaredEuclidean"
|
43
42
|
|