vectordb-bench 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +22 -0
- vectordb_bench/backend/clients/api.py +21 -1
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +47 -6
- vectordb_bench/backend/clients/aws_opensearch/config.py +12 -6
- vectordb_bench/backend/clients/aws_opensearch/run.py +34 -3
- vectordb_bench/backend/clients/memorydb/cli.py +88 -0
- vectordb_bench/backend/clients/memorydb/config.py +54 -0
- vectordb_bench/backend/clients/memorydb/memorydb.py +254 -0
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +154 -0
- vectordb_bench/backend/clients/pgvecto_rs/config.py +108 -73
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +159 -59
- vectordb_bench/backend/clients/pgvector/cli.py +17 -2
- vectordb_bench/backend/clients/pgvector/config.py +20 -5
- vectordb_bench/backend/clients/pgvector/pgvector.py +95 -25
- vectordb_bench/backend/clients/pgvectorscale/cli.py +108 -0
- vectordb_bench/backend/clients/pgvectorscale/config.py +111 -0
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +290 -0
- vectordb_bench/backend/clients/pinecone/config.py +0 -2
- vectordb_bench/backend/clients/pinecone/pinecone.py +34 -36
- vectordb_bench/backend/clients/redis/cli.py +8 -0
- vectordb_bench/backend/clients/redis/config.py +37 -6
- vectordb_bench/backend/runner/mp_runner.py +2 -1
- vectordb_bench/cli/cli.py +137 -0
- vectordb_bench/cli/vectordbbench.py +7 -1
- vectordb_bench/frontend/components/check_results/charts.py +9 -6
- vectordb_bench/frontend/components/check_results/data.py +13 -6
- vectordb_bench/frontend/components/concurrent/charts.py +3 -6
- vectordb_bench/frontend/components/run_test/caseSelector.py +10 -0
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +37 -15
- vectordb_bench/frontend/components/run_test/initStyle.py +3 -1
- vectordb_bench/frontend/config/dbCaseConfigs.py +230 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +13 -5
- vectordb_bench/frontend/vdb_benchmark.py +11 -3
- vectordb_bench/models.py +25 -9
- vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +53 -1
- vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +48 -0
- vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +29 -1
- vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +24 -0
- vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +98 -49
- vectordb_bench/results/getLeaderboardData.py +17 -7
- vectordb_bench/results/leaderboard.json +1 -1
- {vectordb_bench-0.0.12.dist-info → vectordb_bench-0.0.14.dist-info}/METADATA +64 -31
- {vectordb_bench-0.0.12.dist-info → vectordb_bench-0.0.14.dist-info}/RECORD +47 -40
- {vectordb_bench-0.0.12.dist-info → vectordb_bench-0.0.14.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.12.dist-info → vectordb_bench-0.0.14.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.12.dist-info → vectordb_bench-0.0.14.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.12.dist-info → vectordb_bench-0.0.14.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,9 @@ class DB(Enum):
|
|
30
30
|
WeaviateCloud = "WeaviateCloud"
|
31
31
|
PgVector = "PgVector"
|
32
32
|
PgVectoRS = "PgVectoRS"
|
33
|
+
PgVectorScale = "PgVectorScale"
|
33
34
|
Redis = "Redis"
|
35
|
+
MemoryDB = "MemoryDB"
|
34
36
|
Chroma = "Chroma"
|
35
37
|
AWSOpenSearch = "OpenSearch"
|
36
38
|
Test = "test"
|
@@ -70,10 +72,18 @@ class DB(Enum):
|
|
70
72
|
if self == DB.PgVectoRS:
|
71
73
|
from .pgvecto_rs.pgvecto_rs import PgVectoRS
|
72
74
|
return PgVectoRS
|
75
|
+
|
76
|
+
if self == DB.PgVectorScale:
|
77
|
+
from .pgvectorscale.pgvectorscale import PgVectorScale
|
78
|
+
return PgVectorScale
|
73
79
|
|
74
80
|
if self == DB.Redis:
|
75
81
|
from .redis.redis import Redis
|
76
82
|
return Redis
|
83
|
+
|
84
|
+
if self == DB.MemoryDB:
|
85
|
+
from .memorydb.memorydb import MemoryDB
|
86
|
+
return MemoryDB
|
77
87
|
|
78
88
|
if self == DB.Chroma:
|
79
89
|
from .chroma.chroma import ChromaClient
|
@@ -118,9 +128,17 @@ class DB(Enum):
|
|
118
128
|
from .pgvecto_rs.config import PgVectoRSConfig
|
119
129
|
return PgVectoRSConfig
|
120
130
|
|
131
|
+
if self == DB.PgVectorScale:
|
132
|
+
from .pgvectorscale.config import PgVectorScaleConfig
|
133
|
+
return PgVectorScaleConfig
|
134
|
+
|
121
135
|
if self == DB.Redis:
|
122
136
|
from .redis.config import RedisConfig
|
123
137
|
return RedisConfig
|
138
|
+
|
139
|
+
if self == DB.MemoryDB:
|
140
|
+
from .memorydb.config import MemoryDBConfig
|
141
|
+
return MemoryDBConfig
|
124
142
|
|
125
143
|
if self == DB.Chroma:
|
126
144
|
from .chroma.config import ChromaConfig
|
@@ -163,6 +181,10 @@ class DB(Enum):
|
|
163
181
|
from .aws_opensearch.config import AWSOpenSearchIndexConfig
|
164
182
|
return AWSOpenSearchIndexConfig
|
165
183
|
|
184
|
+
if self == DB.PgVectorScale:
|
185
|
+
from .pgvectorscale.config import _pgvectorscale_case_config
|
186
|
+
return _pgvectorscale_case_config.get(index_type)
|
187
|
+
|
166
188
|
# DB.Pinecone, DB.Chroma, DB.Redis
|
167
189
|
return EmptyDBCaseConfig
|
168
190
|
|
@@ -15,6 +15,7 @@ class MetricType(str, Enum):
|
|
15
15
|
class IndexType(str, Enum):
|
16
16
|
HNSW = "HNSW"
|
17
17
|
DISKANN = "DISKANN"
|
18
|
+
STREAMING_DISKANN = "DISKANN"
|
18
19
|
IVFFlat = "IVF_FLAT"
|
19
20
|
IVFSQ8 = "IVF_SQ8"
|
20
21
|
Flat = "FLAT"
|
@@ -38,6 +39,22 @@ class DBConfig(ABC, BaseModel):
|
|
38
39
|
"""
|
39
40
|
|
40
41
|
db_label: str = ""
|
42
|
+
version: str = ""
|
43
|
+
note: str = ""
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def common_short_configs() -> list[str]:
|
47
|
+
"""
|
48
|
+
short input, such as `db_label`, `version`
|
49
|
+
"""
|
50
|
+
return ["version", "db_label"]
|
51
|
+
|
52
|
+
@staticmethod
|
53
|
+
def common_long_configs() -> list[str]:
|
54
|
+
"""
|
55
|
+
long input, such as `note`
|
56
|
+
"""
|
57
|
+
return ["note"]
|
41
58
|
|
42
59
|
@abstractmethod
|
43
60
|
def to_dict(self) -> dict:
|
@@ -45,7 +62,10 @@ class DBConfig(ABC, BaseModel):
|
|
45
62
|
|
46
63
|
@validator("*")
|
47
64
|
def not_empty_field(cls, v, field):
|
48
|
-
if
|
65
|
+
if (
|
66
|
+
field.name in cls.common_short_configs()
|
67
|
+
or field.name in cls.common_long_configs()
|
68
|
+
):
|
49
69
|
return v
|
50
70
|
if not v and isinstance(v, (str, SecretStr)):
|
51
71
|
raise ValueError("Empty string!")
|
@@ -3,7 +3,7 @@ from contextlib import contextmanager
|
|
3
3
|
import time
|
4
4
|
from typing import Iterable, Type
|
5
5
|
from ..api import VectorDB, DBCaseConfig, DBConfig, IndexType
|
6
|
-
from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
|
6
|
+
from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig, AWSOS_Engine
|
7
7
|
from opensearchpy import OpenSearch
|
8
8
|
from opensearchpy.helpers import bulk
|
9
9
|
|
@@ -83,7 +83,7 @@ class AWSOpenSearch(VectorDB):
|
|
83
83
|
|
84
84
|
@contextmanager
|
85
85
|
def init(self) -> None:
|
86
|
-
"""connect to
|
86
|
+
"""connect to opensearch"""
|
87
87
|
self.client = OpenSearch(**self.db_config)
|
88
88
|
|
89
89
|
yield
|
@@ -97,7 +97,7 @@ class AWSOpenSearch(VectorDB):
|
|
97
97
|
metadata: list[int],
|
98
98
|
**kwargs,
|
99
99
|
) -> tuple[int, Exception]:
|
100
|
-
"""Insert the embeddings to the
|
100
|
+
"""Insert the embeddings to the opensearch."""
|
101
101
|
assert self.client is not None, "should self.init() first"
|
102
102
|
|
103
103
|
insert_data = []
|
@@ -136,13 +136,15 @@ class AWSOpenSearch(VectorDB):
|
|
136
136
|
body = {
|
137
137
|
"size": k,
|
138
138
|
"query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
|
139
|
+
**({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {})
|
139
140
|
}
|
140
141
|
try:
|
141
|
-
resp = self.client.search(index=self.index_name, body=body)
|
142
|
+
resp = self.client.search(index=self.index_name, body=body,size=k,_source=False,docvalue_fields=[self.id_col_name],stored_fields="_none_",filter_path=[f"hits.hits.fields.{self.id_col_name}"],)
|
142
143
|
log.info(f'Search took: {resp["took"]}')
|
143
144
|
log.info(f'Search shards: {resp["_shards"]}')
|
144
145
|
log.info(f'Search hits total: {resp["hits"]["total"]}')
|
145
|
-
result = [
|
146
|
+
result = [h["fields"][self.id_col_name][0] for h in resp["hits"]["hits"]]
|
147
|
+
#result = [int(d["_id"]) for d in resp["hits"]["hits"]]
|
146
148
|
# log.info(f'success! length={len(res)}')
|
147
149
|
|
148
150
|
return result
|
@@ -152,7 +154,46 @@ class AWSOpenSearch(VectorDB):
|
|
152
154
|
|
153
155
|
def optimize(self):
|
154
156
|
"""optimize will be called between insertion and search in performance cases."""
|
155
|
-
|
157
|
+
# Call refresh first to ensure that all segments are created
|
158
|
+
self._refresh_index()
|
159
|
+
self._do_force_merge()
|
160
|
+
# Call refresh again to ensure that the index is ready after force merge.
|
161
|
+
self._refresh_index()
|
162
|
+
# ensure that all graphs are loaded in memory and ready for search
|
163
|
+
self._load_graphs_to_memory()
|
164
|
+
|
165
|
+
def _refresh_index(self):
|
166
|
+
log.debug(f"Starting refresh for index {self.index_name}")
|
167
|
+
SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
|
168
|
+
while True:
|
169
|
+
try:
|
170
|
+
log.info(f"Starting the Refresh Index..")
|
171
|
+
self.client.indices.refresh(index=self.index_name)
|
172
|
+
break
|
173
|
+
except Exception as e:
|
174
|
+
log.info(
|
175
|
+
f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
|
176
|
+
time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
|
177
|
+
continue
|
178
|
+
log.debug(f"Completed refresh for index {self.index_name}")
|
179
|
+
|
180
|
+
def _do_force_merge(self):
|
181
|
+
log.debug(f"Starting force merge for index {self.index_name}")
|
182
|
+
force_merge_endpoint = f'/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
|
183
|
+
force_merge_task_id = self.client.transport.perform_request('POST', force_merge_endpoint)['task']
|
184
|
+
SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
|
185
|
+
while True:
|
186
|
+
time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
|
187
|
+
task_status = self.client.tasks.get(task_id=force_merge_task_id)
|
188
|
+
if task_status['completed']:
|
189
|
+
break
|
190
|
+
log.debug(f"Completed force merge for index {self.index_name}")
|
191
|
+
|
192
|
+
def _load_graphs_to_memory(self):
|
193
|
+
if self.case_config.engine != AWSOS_Engine.lucene:
|
194
|
+
log.info("Calling warmup API to load graphs into memory")
|
195
|
+
warmup_endpoint = f'/_plugins/_knn/warmup/{self.index_name}'
|
196
|
+
self.client.transport.perform_request('GET', warmup_endpoint)
|
156
197
|
|
157
198
|
def ready_to_load(self):
|
158
199
|
"""ready_to_load will be called before load in load cases."""
|
@@ -1,9 +1,10 @@
|
|
1
|
+
import logging
|
1
2
|
from enum import Enum
|
2
3
|
from pydantic import SecretStr, BaseModel
|
3
4
|
|
4
5
|
from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
|
5
6
|
|
6
|
-
|
7
|
+
log = logging.getLogger(__name__)
|
7
8
|
class AWSOpenSearchConfig(DBConfig, BaseModel):
|
8
9
|
host: str = ""
|
9
10
|
port: int = 443
|
@@ -31,14 +32,18 @@ class AWSOS_Engine(Enum):
|
|
31
32
|
|
32
33
|
class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
33
34
|
metric_type: MetricType = MetricType.L2
|
34
|
-
engine: AWSOS_Engine = AWSOS_Engine.
|
35
|
-
efConstruction: int =
|
36
|
-
|
35
|
+
engine: AWSOS_Engine = AWSOS_Engine.faiss
|
36
|
+
efConstruction: int = 256
|
37
|
+
efSearch: int = 256
|
38
|
+
M: int = 16
|
37
39
|
|
38
40
|
def parse_metric(self) -> str:
|
39
41
|
if self.metric_type == MetricType.IP:
|
40
|
-
return "innerproduct"
|
42
|
+
return "innerproduct"
|
41
43
|
elif self.metric_type == MetricType.COSINE:
|
44
|
+
if self.engine == AWSOS_Engine.faiss:
|
45
|
+
log.info(f"Using metric type as innerproduct because faiss doesn't support cosine as metric type for Opensearch")
|
46
|
+
return "innerproduct"
|
42
47
|
return "cosinesimil"
|
43
48
|
return "l2"
|
44
49
|
|
@@ -49,7 +54,8 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
|
49
54
|
"engine": self.engine.value,
|
50
55
|
"parameters": {
|
51
56
|
"ef_construction": self.efConstruction,
|
52
|
-
"m": self.M
|
57
|
+
"m": self.M,
|
58
|
+
"ef_search": self.efSearch
|
53
59
|
}
|
54
60
|
}
|
55
61
|
return params
|
@@ -40,12 +40,12 @@ def create_index(client, index_name):
|
|
40
40
|
"type": "knn_vector",
|
41
41
|
"dimension": _DIM,
|
42
42
|
"method": {
|
43
|
-
"engine": "
|
43
|
+
"engine": "faiss",
|
44
44
|
"name": "hnsw",
|
45
45
|
"space_type": "l2",
|
46
46
|
"parameters": {
|
47
|
-
"ef_construction":
|
48
|
-
"m":
|
47
|
+
"ef_construction": 256,
|
48
|
+
"m": 16,
|
49
49
|
}
|
50
50
|
}
|
51
51
|
}
|
@@ -108,12 +108,43 @@ def search(client, index_name):
|
|
108
108
|
print('\nSearch not ready, sleep 1s')
|
109
109
|
time.sleep(1)
|
110
110
|
|
111
|
+
def optimize_index(client, index_name):
|
112
|
+
print(f"Starting force merge for index {index_name}")
|
113
|
+
force_merge_endpoint = f'/{index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
|
114
|
+
force_merge_task_id = client.transport.perform_request('POST', force_merge_endpoint)['task']
|
115
|
+
SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
|
116
|
+
while True:
|
117
|
+
time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
|
118
|
+
task_status = client.tasks.get(task_id=force_merge_task_id)
|
119
|
+
if task_status['completed']:
|
120
|
+
break
|
121
|
+
print(f"Completed force merge for index {index_name}")
|
122
|
+
|
123
|
+
|
124
|
+
def refresh_index(client, index_name):
|
125
|
+
print(f"Starting refresh for index {index_name}")
|
126
|
+
SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
|
127
|
+
while True:
|
128
|
+
try:
|
129
|
+
print(f"Starting the Refresh Index..")
|
130
|
+
client.indices.refresh(index=index_name)
|
131
|
+
break
|
132
|
+
except Exception as e:
|
133
|
+
print(
|
134
|
+
f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
|
135
|
+
time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
|
136
|
+
continue
|
137
|
+
print(f"Completed refresh for index {index_name}")
|
138
|
+
|
139
|
+
|
111
140
|
|
112
141
|
def main():
|
113
142
|
client = create_client()
|
114
143
|
try:
|
115
144
|
create_index(client, _INDEX_NAME)
|
116
145
|
bulk_insert(client, _INDEX_NAME)
|
146
|
+
optimize_index(client, _INDEX_NAME)
|
147
|
+
refresh_index(client, _INDEX_NAME)
|
117
148
|
search(client, _INDEX_NAME)
|
118
149
|
delete_index(client, _INDEX_NAME)
|
119
150
|
except Exception as e:
|
@@ -0,0 +1,88 @@
|
|
1
|
+
from typing import Annotated, TypedDict, Unpack
|
2
|
+
|
3
|
+
import click
|
4
|
+
from pydantic import SecretStr
|
5
|
+
|
6
|
+
from ....cli.cli import (
|
7
|
+
CommonTypedDict,
|
8
|
+
HNSWFlavor2,
|
9
|
+
cli,
|
10
|
+
click_parameter_decorators_from_typed_dict,
|
11
|
+
run,
|
12
|
+
)
|
13
|
+
from .. import DB
|
14
|
+
|
15
|
+
|
16
|
+
class MemoryDBTypedDict(TypedDict):
|
17
|
+
host: Annotated[
|
18
|
+
str, click.option("--host", type=str, help="Db host", required=True)
|
19
|
+
]
|
20
|
+
password: Annotated[str, click.option("--password", type=str, help="Db password")]
|
21
|
+
port: Annotated[int, click.option("--port", type=int, default=6379, help="Db Port")]
|
22
|
+
ssl: Annotated[
|
23
|
+
bool,
|
24
|
+
click.option(
|
25
|
+
"--ssl/--no-ssl",
|
26
|
+
is_flag=True,
|
27
|
+
show_default=True,
|
28
|
+
default=True,
|
29
|
+
help="Enable or disable SSL for MemoryDB",
|
30
|
+
),
|
31
|
+
]
|
32
|
+
ssl_ca_certs: Annotated[
|
33
|
+
str,
|
34
|
+
click.option(
|
35
|
+
"--ssl-ca-certs",
|
36
|
+
show_default=True,
|
37
|
+
help="Path to certificate authority file to use for SSL",
|
38
|
+
),
|
39
|
+
]
|
40
|
+
cmd: Annotated[
|
41
|
+
bool,
|
42
|
+
click.option(
|
43
|
+
"--cmd",
|
44
|
+
is_flag=True,
|
45
|
+
show_default=True,
|
46
|
+
default=False,
|
47
|
+
help="Cluster Mode Disabled (CMD), use this flag when testing locally on a single node instance. In production, MemoryDB only supports cluster mode (CME)",
|
48
|
+
),
|
49
|
+
]
|
50
|
+
insert_batch_size: Annotated[
|
51
|
+
int,
|
52
|
+
click.option(
|
53
|
+
"--insert-batch-size",
|
54
|
+
type=int,
|
55
|
+
default=10,
|
56
|
+
help="Batch size for inserting data. Adjust this as needed, but don't make it too big",
|
57
|
+
),
|
58
|
+
]
|
59
|
+
|
60
|
+
|
61
|
+
class MemoryDBHNSWTypedDict(CommonTypedDict, MemoryDBTypedDict, HNSWFlavor2):
|
62
|
+
...
|
63
|
+
|
64
|
+
|
65
|
+
@cli.command()
|
66
|
+
@click_parameter_decorators_from_typed_dict(MemoryDBHNSWTypedDict)
|
67
|
+
def MemoryDB(**parameters: Unpack[MemoryDBHNSWTypedDict]):
|
68
|
+
from .config import MemoryDBConfig, MemoryDBHNSWConfig
|
69
|
+
|
70
|
+
run(
|
71
|
+
db=DB.MemoryDB,
|
72
|
+
db_config=MemoryDBConfig(
|
73
|
+
db_label=parameters["db_label"],
|
74
|
+
password=SecretStr(parameters["password"]) if parameters["password"] else None,
|
75
|
+
host=SecretStr(parameters["host"]),
|
76
|
+
port=parameters["port"],
|
77
|
+
ssl=parameters["ssl"],
|
78
|
+
ssl_ca_certs=parameters["ssl_ca_certs"],
|
79
|
+
cmd=parameters["cmd"],
|
80
|
+
),
|
81
|
+
db_case_config=MemoryDBHNSWConfig(
|
82
|
+
M=parameters["m"],
|
83
|
+
ef_construction=parameters["ef_construction"],
|
84
|
+
ef_runtime=parameters["ef_runtime"],
|
85
|
+
insert_batch_size=parameters["insert_batch_size"]
|
86
|
+
),
|
87
|
+
**parameters,
|
88
|
+
)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from pydantic import BaseModel, SecretStr
|
2
|
+
|
3
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
4
|
+
|
5
|
+
|
6
|
+
class MemoryDBConfig(DBConfig):
|
7
|
+
host: SecretStr
|
8
|
+
password: SecretStr | None = None
|
9
|
+
port: int | None = None
|
10
|
+
ssl: bool | None = None
|
11
|
+
cmd: bool | None = None
|
12
|
+
ssl_ca_certs: str | None = None
|
13
|
+
|
14
|
+
def to_dict(self) -> dict:
|
15
|
+
return {
|
16
|
+
"host": self.host.get_secret_value(),
|
17
|
+
"port": self.port,
|
18
|
+
"password": self.password.get_secret_value() if self.password else None,
|
19
|
+
"ssl": self.ssl,
|
20
|
+
"cmd": self.cmd,
|
21
|
+
"ssl_ca_certs": self.ssl_ca_certs,
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
class MemoryDBIndexConfig(BaseModel, DBCaseConfig):
|
26
|
+
metric_type: MetricType | None = None
|
27
|
+
insert_batch_size: int | None = None
|
28
|
+
|
29
|
+
def parse_metric(self) -> str:
|
30
|
+
if self.metric_type == MetricType.L2:
|
31
|
+
return "l2"
|
32
|
+
elif self.metric_type == MetricType.IP:
|
33
|
+
return "ip"
|
34
|
+
return "cosine"
|
35
|
+
|
36
|
+
|
37
|
+
class MemoryDBHNSWConfig(MemoryDBIndexConfig):
|
38
|
+
M: int | None = 16
|
39
|
+
ef_construction: int | None = 64
|
40
|
+
ef_runtime: int | None = 10
|
41
|
+
index: IndexType = IndexType.HNSW
|
42
|
+
|
43
|
+
def index_param(self) -> dict:
|
44
|
+
return {
|
45
|
+
"metric": self.parse_metric(),
|
46
|
+
"index_type": self.index.value,
|
47
|
+
"m": self.M,
|
48
|
+
"ef_construction": self.ef_construction,
|
49
|
+
}
|
50
|
+
|
51
|
+
def search_param(self) -> dict:
|
52
|
+
return {
|
53
|
+
"ef_runtime": self.ef_runtime,
|
54
|
+
}
|