vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +56 -46
- vectordb_bench/backend/clients/__init__.py +101 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
- vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
- vectordb_bench/backend/clients/alloydb/cli.py +52 -35
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +8 -9
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +38 -36
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
- vectordb_bench/backend/clients/milvus/cli.py +62 -80
- vectordb_bench/backend/clients/milvus/config.py +31 -7
- vectordb_bench/backend/clients/milvus/milvus.py +23 -26
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +94 -58
- vectordb_bench/backend/clients/test/cli.py +1 -2
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +4 -5
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +30 -18
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +85 -34
- vectordb_bench/backend/runner/rate_runner.py +51 -23
- vectordb_bench/backend/runner/read_write_runner.py +140 -46
- vectordb_bench/backend/runner/serial_runner.py +99 -50
- vectordb_bench/backend/runner/util.py +4 -19
- vectordb_bench/backend/task_runner.py +95 -74
- vectordb_bench/backend/utils.py +17 -9
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +56 -26
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
- vectordb_bench-0.0.20.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.18.dist-info/RECORD +0 -131
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -1,37 +1,40 @@
|
|
1
1
|
import logging
|
2
2
|
from contextlib import contextmanager
|
3
|
-
from typing import Any
|
4
|
-
|
5
|
-
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
import numpy as np
|
6
6
|
import redis
|
7
|
-
from redis.commands.search.field import TagField, VectorField
|
7
|
+
from redis.commands.search.field import NumericField, TagField, VectorField
|
8
8
|
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
9
9
|
from redis.commands.search.query import Query
|
10
|
-
import numpy as np
|
11
10
|
|
11
|
+
from ..api import DBCaseConfig, VectorDB
|
12
12
|
|
13
13
|
log = logging.getLogger(__name__)
|
14
|
-
INDEX_NAME = "index"
|
14
|
+
INDEX_NAME = "index" # Vector Index Name
|
15
|
+
|
15
16
|
|
16
17
|
class Redis(VectorDB):
|
17
18
|
def __init__(
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
):
|
26
|
-
|
19
|
+
self,
|
20
|
+
dim: int,
|
21
|
+
db_config: dict,
|
22
|
+
db_case_config: DBCaseConfig,
|
23
|
+
drop_old: bool = False,
|
24
|
+
**kwargs,
|
25
|
+
):
|
27
26
|
self.db_config = db_config
|
28
27
|
self.case_config = db_case_config
|
29
28
|
self.collection_name = INDEX_NAME
|
30
29
|
|
31
30
|
# Create a redis connection, if db has password configured, add it to the connection here and in init():
|
32
|
-
password=self.db_config["password"]
|
33
|
-
conn = redis.Redis(
|
34
|
-
|
31
|
+
password = self.db_config["password"]
|
32
|
+
conn = redis.Redis(
|
33
|
+
host=self.db_config["host"],
|
34
|
+
port=self.db_config["port"],
|
35
|
+
password=password,
|
36
|
+
db=0,
|
37
|
+
)
|
35
38
|
|
36
39
|
if drop_old:
|
37
40
|
try:
|
@@ -40,7 +43,7 @@ class Redis(VectorDB):
|
|
40
43
|
except redis.exceptions.ResponseError:
|
41
44
|
drop_old = False
|
42
45
|
log.info(f"Redis client drop_old collection: {self.collection_name}")
|
43
|
-
|
46
|
+
|
44
47
|
self.make_index(dim, conn)
|
45
48
|
conn.close()
|
46
49
|
conn = None
|
@@ -49,16 +52,20 @@ class Redis(VectorDB):
|
|
49
52
|
try:
|
50
53
|
# check to see if index exists
|
51
54
|
conn.ft(INDEX_NAME).info()
|
52
|
-
except:
|
55
|
+
except Exception:
|
53
56
|
schema = (
|
54
|
-
TagField("id"),
|
55
|
-
NumericField("metadata"),
|
56
|
-
VectorField(
|
57
|
-
"
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
57
|
+
TagField("id"),
|
58
|
+
NumericField("metadata"),
|
59
|
+
VectorField(
|
60
|
+
"vector", # Vector Field Name
|
61
|
+
"HNSW", # Vector Index Type: FLAT or HNSW
|
62
|
+
{
|
63
|
+
"TYPE": "FLOAT32", # FLOAT32 or FLOAT64
|
64
|
+
"DIM": vector_dimensions, # Number of Vector Dimensions
|
65
|
+
"DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
|
66
|
+
"M": self.case_config.index_param()["params"]["M"],
|
67
|
+
"EF_CONSTRUCTION": self.case_config.index_param()["params"]["efConstruction"],
|
68
|
+
},
|
62
69
|
),
|
63
70
|
)
|
64
71
|
|
@@ -69,22 +76,24 @@ class Redis(VectorDB):
|
|
69
76
|
|
70
77
|
@contextmanager
|
71
78
|
def init(self) -> None:
|
72
|
-
"""
|
79
|
+
"""create and destory connections to database.
|
73
80
|
|
74
81
|
Examples:
|
75
82
|
>>> with self.init():
|
76
83
|
>>> self.insert_embeddings()
|
77
84
|
"""
|
78
|
-
self.conn = redis.Redis(
|
85
|
+
self.conn = redis.Redis(
|
86
|
+
host=self.db_config["host"],
|
87
|
+
port=self.db_config["port"],
|
88
|
+
password=self.db_config["password"],
|
89
|
+
db=0,
|
90
|
+
)
|
79
91
|
yield
|
80
92
|
self.conn.close()
|
81
93
|
self.conn = None
|
82
94
|
|
83
|
-
|
84
95
|
def ready_to_search(self) -> bool:
|
85
96
|
"""Check if the database is ready to search."""
|
86
|
-
pass
|
87
|
-
|
88
97
|
|
89
98
|
def ready_to_load(self) -> bool:
|
90
99
|
pass
|
@@ -92,38 +101,40 @@ class Redis(VectorDB):
|
|
92
101
|
def optimize(self) -> None:
|
93
102
|
pass
|
94
103
|
|
95
|
-
|
96
104
|
def insert_embeddings(
|
97
105
|
self,
|
98
106
|
embeddings: list[list[float]],
|
99
107
|
metadata: list[int],
|
100
108
|
**kwargs: Any,
|
101
|
-
) ->
|
109
|
+
) -> tuple[int, Exception]:
|
102
110
|
"""Insert embeddings into the database.
|
103
111
|
Should call self.init() first.
|
104
112
|
"""
|
105
113
|
|
106
|
-
batch_size = 1000
|
114
|
+
batch_size = 1000 # Adjust this as needed, but don't make too big
|
107
115
|
try:
|
108
116
|
with self.conn.pipeline(transaction=False) as pipe:
|
109
117
|
for i, embedding in enumerate(embeddings):
|
110
|
-
|
111
|
-
pipe.hset(
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
118
|
+
ndarr_emb = np.array(embedding).astype(np.float32)
|
119
|
+
pipe.hset(
|
120
|
+
metadata[i],
|
121
|
+
mapping={
|
122
|
+
"id": str(metadata[i]),
|
123
|
+
"metadata": metadata[i],
|
124
|
+
"vector": ndarr_emb.tobytes(),
|
125
|
+
},
|
126
|
+
)
|
116
127
|
# Execute the pipe so we don't keep too much in memory at once
|
117
128
|
if i % batch_size == 0:
|
118
|
-
|
129
|
+
_ = pipe.execute()
|
119
130
|
|
120
|
-
|
131
|
+
_ = pipe.execute()
|
121
132
|
result_len = i + 1
|
122
133
|
except Exception as e:
|
123
134
|
return 0, e
|
124
|
-
|
135
|
+
|
125
136
|
return result_len, None
|
126
|
-
|
137
|
+
|
127
138
|
def search_embedding(
|
128
139
|
self,
|
129
140
|
query: list[float],
|
@@ -131,28 +142,53 @@ class Redis(VectorDB):
|
|
131
142
|
filters: dict | None = None,
|
132
143
|
timeout: int | None = None,
|
133
144
|
**kwargs: Any,
|
134
|
-
) ->
|
145
|
+
) -> list[int]:
|
135
146
|
assert self.conn is not None
|
136
|
-
|
147
|
+
|
137
148
|
query_vector = np.array(query).astype(np.float32).tobytes()
|
138
|
-
|
149
|
+
ef_runtime = self.case_config.search_param()["params"]["ef"]
|
150
|
+
query_obj = (
|
151
|
+
Query(f"*=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
|
152
|
+
.sort_by("score")
|
153
|
+
.return_fields("id", "score")
|
154
|
+
.paging(0, k)
|
155
|
+
.dialect(2)
|
156
|
+
)
|
139
157
|
query_params = {"vec": query_vector}
|
140
|
-
|
158
|
+
|
141
159
|
if filters:
|
142
160
|
# benchmark test filters of format: {'metadata': '>=10000', 'id': 10000}
|
143
161
|
# gets exact match for id, and range for metadata if they exist in filters
|
144
162
|
id_value = filters.get("id")
|
145
163
|
metadata_value = filters.get("metadata")
|
146
164
|
if id_value and metadata_value:
|
147
|
-
query_obj =
|
165
|
+
query_obj = (
|
166
|
+
Query(
|
167
|
+
f"(@metadata:[{metadata_value} +inf] @id:{ {id_value} })=>[KNN {k} ",
|
168
|
+
f"@vector $vec EF_RUNTIME {ef_runtime} as score]",
|
169
|
+
)
|
170
|
+
.sort_by("score")
|
171
|
+
.return_fields("id", "score")
|
172
|
+
.paging(0, k)
|
173
|
+
.dialect(2)
|
174
|
+
)
|
148
175
|
elif id_value:
|
149
|
-
#gets exact match for id
|
150
|
-
query_obj =
|
151
|
-
|
152
|
-
|
176
|
+
# gets exact match for id
|
177
|
+
query_obj = (
|
178
|
+
Query(f"@id:{ {id_value} }=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
|
179
|
+
.sort_by("score")
|
180
|
+
.return_fields("id", "score")
|
181
|
+
.paging(0, k)
|
182
|
+
.dialect(2)
|
183
|
+
)
|
184
|
+
else: # metadata only case, greater than or equal to metadata value
|
185
|
+
query_obj = (
|
186
|
+
Query(f"@metadata:[{metadata_value} +inf]=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
|
187
|
+
.sort_by("score")
|
188
|
+
.return_fields("id", "score")
|
189
|
+
.paging(0, k)
|
190
|
+
.dialect(2)
|
191
|
+
)
|
153
192
|
res = self.conn.ft(INDEX_NAME).search(query_obj, query_params)
|
154
193
|
# doc in res of format {'id': '9831', 'payload': None, 'score': '1.19209289551e-07'}
|
155
194
|
return [int(doc["id"]) for doc in res.docs]
|
156
|
-
|
157
|
-
|
158
|
-
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import logging
|
2
|
+
from collections.abc import Generator
|
2
3
|
from contextlib import contextmanager
|
3
|
-
from typing import Any
|
4
|
+
from typing import Any
|
4
5
|
|
5
6
|
from ..api import DBCaseConfig, VectorDB
|
6
7
|
|
@@ -43,11 +44,10 @@ class Test(VectorDB):
|
|
43
44
|
embeddings: list[list[float]],
|
44
45
|
metadata: list[int],
|
45
46
|
**kwargs: Any,
|
46
|
-
) ->
|
47
|
+
) -> tuple[int, Exception | None]:
|
47
48
|
"""Insert embeddings into the database.
|
48
49
|
Should call self.init() first.
|
49
50
|
"""
|
50
|
-
raise RuntimeError("Not implemented")
|
51
51
|
return len(metadata), None
|
52
52
|
|
53
53
|
def search_embedding(
|
@@ -58,5 +58,4 @@ class Test(VectorDB):
|
|
58
58
|
timeout: int | None = None,
|
59
59
|
**kwargs: Any,
|
60
60
|
) -> list[int]:
|
61
|
-
|
62
|
-
return [i for i in range(k)]
|
61
|
+
return list(range(k))
|
@@ -14,7 +14,8 @@ from .. import DB
|
|
14
14
|
|
15
15
|
class WeaviateTypedDict(CommonTypedDict):
|
16
16
|
api_key: Annotated[
|
17
|
-
str,
|
17
|
+
str,
|
18
|
+
click.option("--api-key", type=str, help="Weaviate api key", required=True),
|
18
19
|
]
|
19
20
|
url: Annotated[
|
20
21
|
str,
|
@@ -34,8 +35,6 @@ def Weaviate(**parameters: Unpack[WeaviateTypedDict]):
|
|
34
35
|
api_key=SecretStr(parameters["api_key"]),
|
35
36
|
url=SecretStr(parameters["url"]),
|
36
37
|
),
|
37
|
-
db_case_config=WeaviateIndexConfig(
|
38
|
-
ef=256, efConstruction=256, maxConnections=16
|
39
|
-
),
|
38
|
+
db_case_config=WeaviateIndexConfig(ef=256, efConstruction=256, maxConnections=16),
|
40
39
|
**parameters,
|
41
40
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from pydantic import BaseModel, SecretStr
|
2
2
|
|
3
|
-
from ..api import
|
3
|
+
from ..api import DBCaseConfig, DBConfig, MetricType
|
4
4
|
|
5
5
|
|
6
6
|
class WeaviateConfig(DBConfig):
|
@@ -23,7 +23,7 @@ class WeaviateIndexConfig(BaseModel, DBCaseConfig):
|
|
23
23
|
def parse_metric(self) -> str:
|
24
24
|
if self.metric_type == MetricType.L2:
|
25
25
|
return "l2-squared"
|
26
|
-
|
26
|
+
if self.metric_type == MetricType.IP:
|
27
27
|
return "dot"
|
28
28
|
return "cosine"
|
29
29
|
|
@@ -1,13 +1,13 @@
|
|
1
1
|
"""Wrapper around the Weaviate vector database over VectorDB"""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from
|
4
|
+
from collections.abc import Iterable
|
5
5
|
from contextlib import contextmanager
|
6
6
|
|
7
7
|
import weaviate
|
8
8
|
from weaviate.exceptions import WeaviateBaseError
|
9
9
|
|
10
|
-
from ..api import
|
10
|
+
from ..api import DBCaseConfig, VectorDB
|
11
11
|
|
12
12
|
log = logging.getLogger(__name__)
|
13
13
|
|
@@ -23,7 +23,13 @@ class WeaviateCloud(VectorDB):
|
|
23
23
|
**kwargs,
|
24
24
|
):
|
25
25
|
"""Initialize wrapper around the weaviate vector database."""
|
26
|
-
db_config.update(
|
26
|
+
db_config.update(
|
27
|
+
{
|
28
|
+
"auth_client_secret": weaviate.AuthApiKey(
|
29
|
+
api_key=db_config.get("auth_client_secret"),
|
30
|
+
),
|
31
|
+
},
|
32
|
+
)
|
27
33
|
self.db_config = db_config
|
28
34
|
self.case_config = db_case_config
|
29
35
|
self.collection_name = collection_name
|
@@ -33,6 +39,7 @@ class WeaviateCloud(VectorDB):
|
|
33
39
|
self._index_name = "vector_idx"
|
34
40
|
|
35
41
|
from weaviate import Client
|
42
|
+
|
36
43
|
client = Client(**db_config)
|
37
44
|
if drop_old:
|
38
45
|
try:
|
@@ -40,7 +47,7 @@ class WeaviateCloud(VectorDB):
|
|
40
47
|
log.info(f"weaviate client drop_old collection: {self.collection_name}")
|
41
48
|
client.schema.delete_class(self.collection_name)
|
42
49
|
except WeaviateBaseError as e:
|
43
|
-
log.warning(f"Failed to drop collection: {self.collection_name} error: {
|
50
|
+
log.warning(f"Failed to drop collection: {self.collection_name} error: {e!s}")
|
44
51
|
raise e from None
|
45
52
|
self._create_collection(client)
|
46
53
|
client = None
|
@@ -54,20 +61,23 @@ class WeaviateCloud(VectorDB):
|
|
54
61
|
>>> self.search_embedding()
|
55
62
|
"""
|
56
63
|
from weaviate import Client
|
64
|
+
|
57
65
|
self.client = Client(**self.db_config)
|
58
66
|
yield
|
59
67
|
self.client = None
|
60
|
-
del
|
68
|
+
del self.client
|
61
69
|
|
62
70
|
def ready_to_load(self):
|
63
71
|
"""Should call insert first, do nothing"""
|
64
|
-
pass
|
65
72
|
|
66
73
|
def optimize(self):
|
67
74
|
assert self.client.schema.exists(self.collection_name)
|
68
|
-
self.client.schema.update_config(
|
75
|
+
self.client.schema.update_config(
|
76
|
+
self.collection_name,
|
77
|
+
{"vectorIndexConfig": self.case_config.search_param()},
|
78
|
+
)
|
69
79
|
|
70
|
-
def _create_collection(self, client):
|
80
|
+
def _create_collection(self, client: weaviate.Client) -> None:
|
71
81
|
if not client.schema.exists(self.collection_name):
|
72
82
|
log.info(f"Create collection: {self.collection_name}")
|
73
83
|
class_obj = {
|
@@ -78,13 +88,13 @@ class WeaviateCloud(VectorDB):
|
|
78
88
|
"dataType": ["int"],
|
79
89
|
"name": self._scalar_field,
|
80
90
|
},
|
81
|
-
]
|
91
|
+
],
|
82
92
|
}
|
83
93
|
class_obj["vectorIndexConfig"] = self.case_config.index_param()
|
84
94
|
try:
|
85
95
|
client.schema.create_class(class_obj)
|
86
96
|
except WeaviateBaseError as e:
|
87
|
-
log.warning(f"Failed to create collection: {self.collection_name} error: {
|
97
|
+
log.warning(f"Failed to create collection: {self.collection_name} error: {e!s}")
|
88
98
|
raise e from None
|
89
99
|
|
90
100
|
def insert_embeddings(
|
@@ -102,15 +112,17 @@ class WeaviateCloud(VectorDB):
|
|
102
112
|
batch.dynamic = True
|
103
113
|
res = []
|
104
114
|
for i in range(len(metadata)):
|
105
|
-
res.append(
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
115
|
+
res.append(
|
116
|
+
batch.add_data_object(
|
117
|
+
{self._scalar_field: metadata[i]},
|
118
|
+
class_name=self.collection_name,
|
119
|
+
vector=embeddings[i],
|
120
|
+
),
|
121
|
+
)
|
110
122
|
insert_count += 1
|
111
123
|
return (len(res), None)
|
112
124
|
except WeaviateBaseError as e:
|
113
|
-
log.warning(f"Failed to insert data, error: {
|
125
|
+
log.warning(f"Failed to insert data, error: {e!s}")
|
114
126
|
return (insert_count, e)
|
115
127
|
|
116
128
|
def search_embedding(
|
@@ -125,12 +137,17 @@ class WeaviateCloud(VectorDB):
|
|
125
137
|
"""
|
126
138
|
assert self.client.schema.exists(self.collection_name)
|
127
139
|
|
128
|
-
query_obj =
|
140
|
+
query_obj = (
|
141
|
+
self.client.query.get(self.collection_name, [self._scalar_field])
|
142
|
+
.with_additional("distance")
|
143
|
+
.with_near_vector({"vector": query})
|
144
|
+
.with_limit(k)
|
145
|
+
)
|
129
146
|
if filters:
|
130
147
|
where_filter = {
|
131
148
|
"path": "key",
|
132
149
|
"operator": "GreaterThanEqual",
|
133
|
-
"valueInt": filters.get(
|
150
|
+
"valueInt": filters.get("id"),
|
134
151
|
}
|
135
152
|
query_obj = query_obj.with_where(where_filter)
|
136
153
|
|
@@ -138,7 +155,4 @@ class WeaviateCloud(VectorDB):
|
|
138
155
|
res = query_obj.do()
|
139
156
|
|
140
157
|
# Organize results.
|
141
|
-
|
142
|
-
|
143
|
-
return ret
|
144
|
-
|
158
|
+
return [result[self._scalar_field] for result in res["data"]["Get"][self.collection_name]]
|
@@ -1,33 +1,36 @@
|
|
1
|
+
import os
|
1
2
|
from typing import Annotated, Unpack
|
2
3
|
|
3
4
|
import click
|
4
|
-
import os
|
5
5
|
from pydantic import SecretStr
|
6
6
|
|
7
|
+
from vectordb_bench.backend.clients import DB
|
7
8
|
from vectordb_bench.cli.cli import (
|
8
9
|
CommonTypedDict,
|
9
10
|
cli,
|
10
11
|
click_parameter_decorators_from_typed_dict,
|
11
12
|
run,
|
12
13
|
)
|
13
|
-
from vectordb_bench.backend.clients import DB
|
14
14
|
|
15
15
|
|
16
16
|
class ZillizTypedDict(CommonTypedDict):
|
17
17
|
uri: Annotated[
|
18
|
-
str,
|
18
|
+
str,
|
19
|
+
click.option("--uri", type=str, help="uri connection string", required=True),
|
19
20
|
]
|
20
21
|
user_name: Annotated[
|
21
|
-
str,
|
22
|
+
str,
|
23
|
+
click.option("--user-name", type=str, help="Db username", required=True),
|
22
24
|
]
|
23
25
|
password: Annotated[
|
24
26
|
str,
|
25
|
-
click.option(
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
click.option(
|
28
|
+
"--password",
|
29
|
+
type=str,
|
30
|
+
help="Zilliz password",
|
31
|
+
default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
|
32
|
+
show_default="$ZILLIZ_PASSWORD",
|
33
|
+
),
|
31
34
|
]
|
32
35
|
level: Annotated[
|
33
36
|
str,
|
@@ -38,7 +41,7 @@ class ZillizTypedDict(CommonTypedDict):
|
|
38
41
|
@cli.command()
|
39
42
|
@click_parameter_decorators_from_typed_dict(ZillizTypedDict)
|
40
43
|
def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
|
41
|
-
from .config import
|
44
|
+
from .config import AutoIndexConfig, ZillizCloudConfig
|
42
45
|
|
43
46
|
run(
|
44
47
|
db=DB.ZillizCloud,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from pydantic import SecretStr
|
2
2
|
|
3
3
|
from ..api import DBCaseConfig, DBConfig
|
4
|
-
from ..milvus.config import
|
4
|
+
from ..milvus.config import IndexType, MilvusIndexConfig
|
5
5
|
|
6
6
|
|
7
7
|
class ZillizCloudConfig(DBConfig):
|
@@ -33,7 +33,5 @@ class AutoIndexConfig(MilvusIndexConfig, DBCaseConfig):
|
|
33
33
|
"metric_type": self.parse_metric(),
|
34
34
|
"params": {
|
35
35
|
"level": self.level,
|
36
|
-
}
|
36
|
+
},
|
37
37
|
}
|
38
|
-
|
39
|
-
|