vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,37 +1,40 @@
|
|
1
1
|
import logging
|
2
2
|
from contextlib import contextmanager
|
3
|
-
from typing import Any
|
4
|
-
|
5
|
-
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
import numpy as np
|
6
6
|
import redis
|
7
|
-
from redis.commands.search.field import TagField, VectorField
|
7
|
+
from redis.commands.search.field import NumericField, TagField, VectorField
|
8
8
|
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
9
9
|
from redis.commands.search.query import Query
|
10
|
-
import numpy as np
|
11
10
|
|
11
|
+
from ..api import DBCaseConfig, VectorDB
|
12
12
|
|
13
13
|
log = logging.getLogger(__name__)
|
14
|
-
INDEX_NAME = "index"
|
14
|
+
INDEX_NAME = "index" # Vector Index Name
|
15
|
+
|
15
16
|
|
16
17
|
class Redis(VectorDB):
|
17
18
|
def __init__(
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
):
|
26
|
-
|
19
|
+
self,
|
20
|
+
dim: int,
|
21
|
+
db_config: dict,
|
22
|
+
db_case_config: DBCaseConfig,
|
23
|
+
drop_old: bool = False,
|
24
|
+
**kwargs,
|
25
|
+
):
|
27
26
|
self.db_config = db_config
|
28
27
|
self.case_config = db_case_config
|
29
28
|
self.collection_name = INDEX_NAME
|
30
29
|
|
31
30
|
# Create a redis connection, if db has password configured, add it to the connection here and in init():
|
32
|
-
password=self.db_config["password"]
|
33
|
-
conn = redis.Redis(
|
34
|
-
|
31
|
+
password = self.db_config["password"]
|
32
|
+
conn = redis.Redis(
|
33
|
+
host=self.db_config["host"],
|
34
|
+
port=self.db_config["port"],
|
35
|
+
password=password,
|
36
|
+
db=0,
|
37
|
+
)
|
35
38
|
|
36
39
|
if drop_old:
|
37
40
|
try:
|
@@ -40,7 +43,7 @@ class Redis(VectorDB):
|
|
40
43
|
except redis.exceptions.ResponseError:
|
41
44
|
drop_old = False
|
42
45
|
log.info(f"Redis client drop_old collection: {self.collection_name}")
|
43
|
-
|
46
|
+
|
44
47
|
self.make_index(dim, conn)
|
45
48
|
conn.close()
|
46
49
|
conn = None
|
@@ -49,16 +52,20 @@ class Redis(VectorDB):
|
|
49
52
|
try:
|
50
53
|
# check to see if index exists
|
51
54
|
conn.ft(INDEX_NAME).info()
|
52
|
-
except:
|
55
|
+
except Exception:
|
53
56
|
schema = (
|
54
|
-
TagField("id"),
|
55
|
-
NumericField("metadata"),
|
56
|
-
VectorField(
|
57
|
-
"
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
57
|
+
TagField("id"),
|
58
|
+
NumericField("metadata"),
|
59
|
+
VectorField(
|
60
|
+
"vector", # Vector Field Name
|
61
|
+
"HNSW", # Vector Index Type: FLAT or HNSW
|
62
|
+
{
|
63
|
+
"TYPE": "FLOAT32", # FLOAT32 or FLOAT64
|
64
|
+
"DIM": vector_dimensions, # Number of Vector Dimensions
|
65
|
+
"DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
|
66
|
+
"M": self.case_config.index_param()["params"]["M"],
|
67
|
+
"EF_CONSTRUCTION": self.case_config.index_param()["params"]["efConstruction"],
|
68
|
+
},
|
62
69
|
),
|
63
70
|
)
|
64
71
|
|
@@ -69,61 +76,62 @@ class Redis(VectorDB):
|
|
69
76
|
|
70
77
|
@contextmanager
|
71
78
|
def init(self) -> None:
|
72
|
-
"""
|
79
|
+
"""create and destory connections to database.
|
73
80
|
|
74
81
|
Examples:
|
75
82
|
>>> with self.init():
|
76
83
|
>>> self.insert_embeddings()
|
77
84
|
"""
|
78
|
-
self.conn = redis.Redis(
|
85
|
+
self.conn = redis.Redis(
|
86
|
+
host=self.db_config["host"],
|
87
|
+
port=self.db_config["port"],
|
88
|
+
password=self.db_config["password"],
|
89
|
+
db=0,
|
90
|
+
)
|
79
91
|
yield
|
80
92
|
self.conn.close()
|
81
93
|
self.conn = None
|
82
94
|
|
83
|
-
|
84
95
|
def ready_to_search(self) -> bool:
|
85
96
|
"""Check if the database is ready to search."""
|
86
|
-
pass
|
87
|
-
|
88
|
-
|
89
|
-
def ready_to_load(self) -> bool:
|
90
|
-
pass
|
91
97
|
|
92
|
-
def optimize(self
|
98
|
+
def optimize(self, data_size: int | None = None):
|
93
99
|
pass
|
94
100
|
|
95
|
-
|
96
101
|
def insert_embeddings(
|
97
102
|
self,
|
98
103
|
embeddings: list[list[float]],
|
99
104
|
metadata: list[int],
|
100
105
|
**kwargs: Any,
|
101
|
-
) ->
|
106
|
+
) -> tuple[int, Exception]:
|
102
107
|
"""Insert embeddings into the database.
|
103
108
|
Should call self.init() first.
|
104
109
|
"""
|
105
110
|
|
106
|
-
batch_size = 1000
|
111
|
+
batch_size = 1000 # Adjust this as needed, but don't make too big
|
107
112
|
try:
|
108
113
|
with self.conn.pipeline(transaction=False) as pipe:
|
109
114
|
for i, embedding in enumerate(embeddings):
|
110
|
-
|
111
|
-
pipe.hset(
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
115
|
+
ndarr_emb = np.array(embedding).astype(np.float32)
|
116
|
+
pipe.hset(
|
117
|
+
metadata[i],
|
118
|
+
mapping={
|
119
|
+
"id": str(metadata[i]),
|
120
|
+
"metadata": metadata[i],
|
121
|
+
"vector": ndarr_emb.tobytes(),
|
122
|
+
},
|
123
|
+
)
|
116
124
|
# Execute the pipe so we don't keep too much in memory at once
|
117
125
|
if i % batch_size == 0:
|
118
|
-
|
126
|
+
_ = pipe.execute()
|
119
127
|
|
120
|
-
|
128
|
+
_ = pipe.execute()
|
121
129
|
result_len = i + 1
|
122
130
|
except Exception as e:
|
123
131
|
return 0, e
|
124
|
-
|
132
|
+
|
125
133
|
return result_len, None
|
126
|
-
|
134
|
+
|
127
135
|
def search_embedding(
|
128
136
|
self,
|
129
137
|
query: list[float],
|
@@ -131,28 +139,53 @@ class Redis(VectorDB):
|
|
131
139
|
filters: dict | None = None,
|
132
140
|
timeout: int | None = None,
|
133
141
|
**kwargs: Any,
|
134
|
-
) ->
|
142
|
+
) -> list[int]:
|
135
143
|
assert self.conn is not None
|
136
|
-
|
144
|
+
|
137
145
|
query_vector = np.array(query).astype(np.float32).tobytes()
|
138
|
-
|
146
|
+
ef_runtime = self.case_config.search_param()["params"]["ef"]
|
147
|
+
query_obj = (
|
148
|
+
Query(f"*=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
|
149
|
+
.sort_by("score")
|
150
|
+
.return_fields("id", "score")
|
151
|
+
.paging(0, k)
|
152
|
+
.dialect(2)
|
153
|
+
)
|
139
154
|
query_params = {"vec": query_vector}
|
140
|
-
|
155
|
+
|
141
156
|
if filters:
|
142
157
|
# benchmark test filters of format: {'metadata': '>=10000', 'id': 10000}
|
143
158
|
# gets exact match for id, and range for metadata if they exist in filters
|
144
159
|
id_value = filters.get("id")
|
145
160
|
metadata_value = filters.get("metadata")
|
146
161
|
if id_value and metadata_value:
|
147
|
-
query_obj =
|
162
|
+
query_obj = (
|
163
|
+
Query(
|
164
|
+
f"(@metadata:[{metadata_value} +inf] @id:{ {id_value} })=>[KNN {k} ",
|
165
|
+
f"@vector $vec EF_RUNTIME {ef_runtime} as score]",
|
166
|
+
)
|
167
|
+
.sort_by("score")
|
168
|
+
.return_fields("id", "score")
|
169
|
+
.paging(0, k)
|
170
|
+
.dialect(2)
|
171
|
+
)
|
148
172
|
elif id_value:
|
149
|
-
#gets exact match for id
|
150
|
-
query_obj =
|
151
|
-
|
152
|
-
|
173
|
+
# gets exact match for id
|
174
|
+
query_obj = (
|
175
|
+
Query(f"@id:{ {id_value} }=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
|
176
|
+
.sort_by("score")
|
177
|
+
.return_fields("id", "score")
|
178
|
+
.paging(0, k)
|
179
|
+
.dialect(2)
|
180
|
+
)
|
181
|
+
else: # metadata only case, greater than or equal to metadata value
|
182
|
+
query_obj = (
|
183
|
+
Query(f"@metadata:[{metadata_value} +inf]=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
|
184
|
+
.sort_by("score")
|
185
|
+
.return_fields("id", "score")
|
186
|
+
.paging(0, k)
|
187
|
+
.dialect(2)
|
188
|
+
)
|
153
189
|
res = self.conn.ft(INDEX_NAME).search(query_obj, query_params)
|
154
190
|
# doc in res of format {'id': '9831', 'payload': None, 'score': '1.19209289551e-07'}
|
155
191
|
return [int(doc["id"]) for doc in res.docs]
|
156
|
-
|
157
|
-
|
158
|
-
|
@@ -10,15 +10,14 @@ from .. import DB
|
|
10
10
|
from ..test.config import TestConfig, TestIndexConfig
|
11
11
|
|
12
12
|
|
13
|
-
class TestTypedDict(CommonTypedDict):
|
14
|
-
...
|
13
|
+
class TestTypedDict(CommonTypedDict): ...
|
15
14
|
|
16
15
|
|
17
16
|
@cli.command()
|
18
17
|
@click_parameter_decorators_from_typed_dict(TestTypedDict)
|
19
18
|
def Test(**parameters: Unpack[TestTypedDict]):
|
20
19
|
run(
|
21
|
-
db=DB.
|
20
|
+
db=DB.Test,
|
22
21
|
db_config=TestConfig(db_label=parameters["db_label"]),
|
23
22
|
db_case_config=TestIndexConfig(),
|
24
23
|
**parameters,
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
|
+
from collections.abc import Generator
|
2
3
|
from contextlib import contextmanager
|
3
|
-
from typing import Any
|
4
|
+
from typing import Any
|
4
5
|
|
5
6
|
from ..api import DBCaseConfig, VectorDB
|
6
7
|
|
@@ -32,10 +33,7 @@ class Test(VectorDB):
|
|
32
33
|
|
33
34
|
yield
|
34
35
|
|
35
|
-
def
|
36
|
-
return True
|
37
|
-
|
38
|
-
def optimize(self) -> None:
|
36
|
+
def optimize(self, data_size: int | None = None):
|
39
37
|
pass
|
40
38
|
|
41
39
|
def insert_embeddings(
|
@@ -43,11 +41,10 @@ class Test(VectorDB):
|
|
43
41
|
embeddings: list[list[float]],
|
44
42
|
metadata: list[int],
|
45
43
|
**kwargs: Any,
|
46
|
-
) ->
|
44
|
+
) -> tuple[int, Exception | None]:
|
47
45
|
"""Insert embeddings into the database.
|
48
46
|
Should call self.init() first.
|
49
47
|
"""
|
50
|
-
raise RuntimeError("Not implemented")
|
51
48
|
return len(metadata), None
|
52
49
|
|
53
50
|
def search_embedding(
|
@@ -58,5 +55,4 @@ class Test(VectorDB):
|
|
58
55
|
timeout: int | None = None,
|
59
56
|
**kwargs: Any,
|
60
57
|
) -> list[int]:
|
61
|
-
|
62
|
-
return [i for i in range(k)]
|
58
|
+
return list(range(k))
|
@@ -14,7 +14,8 @@ from .. import DB
|
|
14
14
|
|
15
15
|
class WeaviateTypedDict(CommonTypedDict):
|
16
16
|
api_key: Annotated[
|
17
|
-
str,
|
17
|
+
str,
|
18
|
+
click.option("--api-key", type=str, help="Weaviate api key", required=True),
|
18
19
|
]
|
19
20
|
url: Annotated[
|
20
21
|
str,
|
@@ -34,8 +35,6 @@ def Weaviate(**parameters: Unpack[WeaviateTypedDict]):
|
|
34
35
|
api_key=SecretStr(parameters["api_key"]),
|
35
36
|
url=SecretStr(parameters["url"]),
|
36
37
|
),
|
37
|
-
db_case_config=WeaviateIndexConfig(
|
38
|
-
ef=256, efConstruction=256, maxConnections=16
|
39
|
-
),
|
38
|
+
db_case_config=WeaviateIndexConfig(ef=256, efConstruction=256, maxConnections=16),
|
40
39
|
**parameters,
|
41
40
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from pydantic import BaseModel, SecretStr
|
2
2
|
|
3
|
-
from ..api import
|
3
|
+
from ..api import DBCaseConfig, DBConfig, MetricType
|
4
4
|
|
5
5
|
|
6
6
|
class WeaviateConfig(DBConfig):
|
@@ -23,7 +23,7 @@ class WeaviateIndexConfig(BaseModel, DBCaseConfig):
|
|
23
23
|
def parse_metric(self) -> str:
|
24
24
|
if self.metric_type == MetricType.L2:
|
25
25
|
return "l2-squared"
|
26
|
-
|
26
|
+
if self.metric_type == MetricType.IP:
|
27
27
|
return "dot"
|
28
28
|
return "cosine"
|
29
29
|
|
@@ -1,13 +1,13 @@
|
|
1
1
|
"""Wrapper around the Weaviate vector database over VectorDB"""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from
|
4
|
+
from collections.abc import Iterable
|
5
5
|
from contextlib import contextmanager
|
6
6
|
|
7
7
|
import weaviate
|
8
8
|
from weaviate.exceptions import WeaviateBaseError
|
9
9
|
|
10
|
-
from ..api import
|
10
|
+
from ..api import DBCaseConfig, VectorDB
|
11
11
|
|
12
12
|
log = logging.getLogger(__name__)
|
13
13
|
|
@@ -23,7 +23,13 @@ class WeaviateCloud(VectorDB):
|
|
23
23
|
**kwargs,
|
24
24
|
):
|
25
25
|
"""Initialize wrapper around the weaviate vector database."""
|
26
|
-
db_config.update(
|
26
|
+
db_config.update(
|
27
|
+
{
|
28
|
+
"auth_client_secret": weaviate.AuthApiKey(
|
29
|
+
api_key=db_config.get("auth_client_secret"),
|
30
|
+
),
|
31
|
+
},
|
32
|
+
)
|
27
33
|
self.db_config = db_config
|
28
34
|
self.case_config = db_case_config
|
29
35
|
self.collection_name = collection_name
|
@@ -33,6 +39,7 @@ class WeaviateCloud(VectorDB):
|
|
33
39
|
self._index_name = "vector_idx"
|
34
40
|
|
35
41
|
from weaviate import Client
|
42
|
+
|
36
43
|
client = Client(**db_config)
|
37
44
|
if drop_old:
|
38
45
|
try:
|
@@ -40,7 +47,7 @@ class WeaviateCloud(VectorDB):
|
|
40
47
|
log.info(f"weaviate client drop_old collection: {self.collection_name}")
|
41
48
|
client.schema.delete_class(self.collection_name)
|
42
49
|
except WeaviateBaseError as e:
|
43
|
-
log.warning(f"Failed to drop collection: {self.collection_name} error: {
|
50
|
+
log.warning(f"Failed to drop collection: {self.collection_name} error: {e!s}")
|
44
51
|
raise e from None
|
45
52
|
self._create_collection(client)
|
46
53
|
client = None
|
@@ -54,20 +61,20 @@ class WeaviateCloud(VectorDB):
|
|
54
61
|
>>> self.search_embedding()
|
55
62
|
"""
|
56
63
|
from weaviate import Client
|
64
|
+
|
57
65
|
self.client = Client(**self.db_config)
|
58
66
|
yield
|
59
67
|
self.client = None
|
60
|
-
del
|
61
|
-
|
62
|
-
def ready_to_load(self):
|
63
|
-
"""Should call insert first, do nothing"""
|
64
|
-
pass
|
68
|
+
del self.client
|
65
69
|
|
66
|
-
def optimize(self):
|
70
|
+
def optimize(self, data_size: int | None = None):
|
67
71
|
assert self.client.schema.exists(self.collection_name)
|
68
|
-
self.client.schema.update_config(
|
72
|
+
self.client.schema.update_config(
|
73
|
+
self.collection_name,
|
74
|
+
{"vectorIndexConfig": self.case_config.search_param()},
|
75
|
+
)
|
69
76
|
|
70
|
-
def _create_collection(self, client):
|
77
|
+
def _create_collection(self, client: weaviate.Client) -> None:
|
71
78
|
if not client.schema.exists(self.collection_name):
|
72
79
|
log.info(f"Create collection: {self.collection_name}")
|
73
80
|
class_obj = {
|
@@ -78,13 +85,13 @@ class WeaviateCloud(VectorDB):
|
|
78
85
|
"dataType": ["int"],
|
79
86
|
"name": self._scalar_field,
|
80
87
|
},
|
81
|
-
]
|
88
|
+
],
|
82
89
|
}
|
83
90
|
class_obj["vectorIndexConfig"] = self.case_config.index_param()
|
84
91
|
try:
|
85
92
|
client.schema.create_class(class_obj)
|
86
93
|
except WeaviateBaseError as e:
|
87
|
-
log.warning(f"Failed to create collection: {self.collection_name} error: {
|
94
|
+
log.warning(f"Failed to create collection: {self.collection_name} error: {e!s}")
|
88
95
|
raise e from None
|
89
96
|
|
90
97
|
def insert_embeddings(
|
@@ -102,15 +109,17 @@ class WeaviateCloud(VectorDB):
|
|
102
109
|
batch.dynamic = True
|
103
110
|
res = []
|
104
111
|
for i in range(len(metadata)):
|
105
|
-
res.append(
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
112
|
+
res.append(
|
113
|
+
batch.add_data_object(
|
114
|
+
{self._scalar_field: metadata[i]},
|
115
|
+
class_name=self.collection_name,
|
116
|
+
vector=embeddings[i],
|
117
|
+
),
|
118
|
+
)
|
110
119
|
insert_count += 1
|
111
120
|
return (len(res), None)
|
112
121
|
except WeaviateBaseError as e:
|
113
|
-
log.warning(f"Failed to insert data, error: {
|
122
|
+
log.warning(f"Failed to insert data, error: {e!s}")
|
114
123
|
return (insert_count, e)
|
115
124
|
|
116
125
|
def search_embedding(
|
@@ -125,12 +134,17 @@ class WeaviateCloud(VectorDB):
|
|
125
134
|
"""
|
126
135
|
assert self.client.schema.exists(self.collection_name)
|
127
136
|
|
128
|
-
query_obj =
|
137
|
+
query_obj = (
|
138
|
+
self.client.query.get(self.collection_name, [self._scalar_field])
|
139
|
+
.with_additional("distance")
|
140
|
+
.with_near_vector({"vector": query})
|
141
|
+
.with_limit(k)
|
142
|
+
)
|
129
143
|
if filters:
|
130
144
|
where_filter = {
|
131
145
|
"path": "key",
|
132
146
|
"operator": "GreaterThanEqual",
|
133
|
-
"valueInt": filters.get(
|
147
|
+
"valueInt": filters.get("id"),
|
134
148
|
}
|
135
149
|
query_obj = query_obj.with_where(where_filter)
|
136
150
|
|
@@ -138,7 +152,4 @@ class WeaviateCloud(VectorDB):
|
|
138
152
|
res = query_obj.do()
|
139
153
|
|
140
154
|
# Organize results.
|
141
|
-
|
142
|
-
|
143
|
-
return ret
|
144
|
-
|
155
|
+
return [result[self._scalar_field] for result in res["data"]["Get"][self.collection_name]]
|
@@ -1,33 +1,36 @@
|
|
1
|
+
import os
|
1
2
|
from typing import Annotated, Unpack
|
2
3
|
|
3
4
|
import click
|
4
|
-
import os
|
5
5
|
from pydantic import SecretStr
|
6
6
|
|
7
|
+
from vectordb_bench.backend.clients import DB
|
7
8
|
from vectordb_bench.cli.cli import (
|
8
9
|
CommonTypedDict,
|
9
10
|
cli,
|
10
11
|
click_parameter_decorators_from_typed_dict,
|
11
12
|
run,
|
12
13
|
)
|
13
|
-
from vectordb_bench.backend.clients import DB
|
14
14
|
|
15
15
|
|
16
16
|
class ZillizTypedDict(CommonTypedDict):
|
17
17
|
uri: Annotated[
|
18
|
-
str,
|
18
|
+
str,
|
19
|
+
click.option("--uri", type=str, help="uri connection string", required=True),
|
19
20
|
]
|
20
21
|
user_name: Annotated[
|
21
|
-
str,
|
22
|
+
str,
|
23
|
+
click.option("--user-name", type=str, help="Db username", required=True),
|
22
24
|
]
|
23
25
|
password: Annotated[
|
24
26
|
str,
|
25
|
-
click.option(
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
click.option(
|
28
|
+
"--password",
|
29
|
+
type=str,
|
30
|
+
help="Zilliz password",
|
31
|
+
default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
|
32
|
+
show_default="$ZILLIZ_PASSWORD",
|
33
|
+
),
|
31
34
|
]
|
32
35
|
level: Annotated[
|
33
36
|
str,
|
@@ -38,7 +41,7 @@ class ZillizTypedDict(CommonTypedDict):
|
|
38
41
|
@cli.command()
|
39
42
|
@click_parameter_decorators_from_typed_dict(ZillizTypedDict)
|
40
43
|
def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
|
41
|
-
from .config import
|
44
|
+
from .config import AutoIndexConfig, ZillizCloudConfig
|
42
45
|
|
43
46
|
run(
|
44
47
|
db=DB.ZillizCloud,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from pydantic import SecretStr
|
2
2
|
|
3
3
|
from ..api import DBCaseConfig, DBConfig
|
4
|
-
from ..milvus.config import
|
4
|
+
from ..milvus.config import IndexType, MilvusIndexConfig
|
5
5
|
|
6
6
|
|
7
7
|
class ZillizCloudConfig(DBConfig):
|
@@ -33,7 +33,5 @@ class AutoIndexConfig(MilvusIndexConfig, DBCaseConfig):
|
|
33
33
|
"metric_type": self.parse_metric(),
|
34
34
|
"params": {
|
35
35
|
"level": self.level,
|
36
|
-
}
|
36
|
+
},
|
37
37
|
}
|
38
|
-
|
39
|
-
|