vectordb-bench 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +4 -4
- vectordb_bench/backend/clients/pgvecto_rs/config.py +44 -32
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +16 -16
- vectordb_bench/backend/clients/pgvector/config.py +48 -9
- vectordb_bench/backend/clients/pgvector/pgvector.py +18 -4
- vectordb_bench/backend/clients/qdrant_cloud/config.py +19 -6
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +11 -7
- vectordb_bench/frontend/const/dbCaseConfigs.py +10 -1
- vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +8 -0
- vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +9 -3
- vectordb_bench/results/ZillizCloud/{result_20240105_beta_202401_zillizcloud.json → result_20240105_standard_202401_zillizcloud.json} +365 -41
- vectordb_bench/results/getLeaderboardData.py +1 -1
- vectordb_bench/results/leaderboard.json +1 -1
- {vectordb_bench-0.0.7.dist-info → vectordb_bench-0.0.8.dist-info}/METADATA +2 -2
- {vectordb_bench-0.0.7.dist-info → vectordb_bench-0.0.8.dist-info}/RECORD +19 -19
- {vectordb_bench-0.0.7.dist-info → vectordb_bench-0.0.8.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.7.dist-info → vectordb_bench-0.0.8.dist-info}/WHEEL +0 -0
- {vectordb_bench-0.0.7.dist-info → vectordb_bench-0.0.8.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.7.dist-info → vectordb_bench-0.0.8.dist-info}/top_level.txt +0 -0
@@ -54,8 +54,8 @@ class DB(Enum):
|
|
54
54
|
return ElasticCloud
|
55
55
|
|
56
56
|
if self == DB.QdrantCloud:
|
57
|
-
from .qdrant_cloud.qdrant_cloud import
|
58
|
-
return
|
57
|
+
from .qdrant_cloud.qdrant_cloud import QdrantCloud
|
58
|
+
return QdrantCloud
|
59
59
|
|
60
60
|
if self == DB.WeaviateCloud:
|
61
61
|
from .weaviate_cloud.weaviate_cloud import WeaviateCloud
|
@@ -142,8 +142,8 @@ class DB(Enum):
|
|
142
142
|
return WeaviateIndexConfig
|
143
143
|
|
144
144
|
if self == DB.PgVector:
|
145
|
-
from .pgvector.config import
|
146
|
-
return
|
145
|
+
from .pgvector.config import _pgvector_case_config
|
146
|
+
return _pgvector_case_config.get(index_type)
|
147
147
|
|
148
148
|
if self == DB.PgVectoRS:
|
149
149
|
from .pgvecto_rs.config import _pgvecto_rs_case_config
|
@@ -8,42 +8,30 @@ POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
|
8
8
|
class PgVectoRSConfig(DBConfig):
|
9
9
|
user_name: SecretStr = "postgres"
|
10
10
|
password: SecretStr
|
11
|
-
|
11
|
+
host: str = "localhost"
|
12
|
+
port: int = 5432
|
12
13
|
db_name: str
|
13
14
|
|
14
15
|
def to_dict(self) -> dict:
|
15
16
|
user_str = self.user_name.get_secret_value()
|
16
17
|
pwd_str = self.password.get_secret_value()
|
17
|
-
url_str = self.url.get_secret_value()
|
18
|
-
host, port = url_str.split(":")
|
19
18
|
return {
|
20
|
-
"host": host,
|
21
|
-
"port": port,
|
19
|
+
"host": self.host,
|
20
|
+
"port": self.port,
|
22
21
|
"dbname": self.db_name,
|
23
22
|
"user": user_str,
|
24
|
-
"password": pwd_str
|
23
|
+
"password": pwd_str
|
25
24
|
}
|
26
25
|
|
27
|
-
|
28
26
|
class PgVectoRSIndexConfig(BaseModel, DBCaseConfig):
|
29
27
|
metric_type: MetricType | None = None
|
30
|
-
quantizationType: Literal["trivial", "scalar", "product"]
|
31
|
-
quantizationRatio: None | Literal["x4", "x8", "x16", "x32", "x64"]
|
32
|
-
|
33
|
-
def parse_quantization(self) -> str:
|
34
|
-
if self.quantizationType == "trivial":
|
35
|
-
return "quantization = { trivial = { } }"
|
36
|
-
elif self.quantizationType == "scalar":
|
37
|
-
return "quantization = { scalar = { } }"
|
38
|
-
else:
|
39
|
-
return f'quantization = {{ product = {{ ratio = "{self.quantizationRatio}" }} }}'
|
40
28
|
|
41
29
|
def parse_metric(self) -> str:
|
42
30
|
if self.metric_type == MetricType.L2:
|
43
|
-
return "
|
31
|
+
return "vector_l2_ops"
|
44
32
|
elif self.metric_type == MetricType.IP:
|
45
|
-
return "
|
46
|
-
return "
|
33
|
+
return "vector_dot_ops"
|
34
|
+
return "vector_cos_ops"
|
47
35
|
|
48
36
|
def parse_metric_fun_op(self) -> str:
|
49
37
|
if self.metric_type == MetricType.L2:
|
@@ -52,16 +40,27 @@ class PgVectoRSIndexConfig(BaseModel, DBCaseConfig):
|
|
52
40
|
return "<#>"
|
53
41
|
return "<=>"
|
54
42
|
|
43
|
+
class PgVectoRSQuantConfig(PgVectoRSIndexConfig):
|
44
|
+
quantizationType: Literal["trivial", "scalar", "product"]
|
45
|
+
quantizationRatio: None | Literal["x4", "x8", "x16", "x32", "x64"]
|
55
46
|
|
56
|
-
|
47
|
+
def parse_quantization(self) -> str:
|
48
|
+
if self.quantizationType == "trivial":
|
49
|
+
return "quantization = { trivial = { } }"
|
50
|
+
elif self.quantizationType == "scalar":
|
51
|
+
return "quantization = { scalar = { } }"
|
52
|
+
else:
|
53
|
+
return f'quantization = {{ product = {{ ratio = "{self.quantizationRatio}" }} }}'
|
54
|
+
|
55
|
+
|
56
|
+
class HNSWConfig(PgVectoRSQuantConfig):
|
57
57
|
M: int
|
58
58
|
efConstruction: int
|
59
59
|
index: IndexType = IndexType.HNSW
|
60
60
|
|
61
61
|
def index_param(self) -> dict:
|
62
62
|
options = f"""
|
63
|
-
|
64
|
-
[algorithm.hnsw]
|
63
|
+
[indexing.hnsw]
|
65
64
|
m = {self.M}
|
66
65
|
ef_construction = {self.efConstruction}
|
67
66
|
{self.parse_quantization()}
|
@@ -72,17 +71,16 @@ ef_construction = {self.efConstruction}
|
|
72
71
|
return {"metrics_op": self.parse_metric_fun_op()}
|
73
72
|
|
74
73
|
|
75
|
-
class IVFFlatConfig(
|
74
|
+
class IVFFlatConfig(PgVectoRSQuantConfig):
|
76
75
|
nlist: int
|
77
76
|
nprobe: int | None = None
|
78
77
|
index: IndexType = IndexType.IVFFlat
|
79
78
|
|
80
79
|
def index_param(self) -> dict:
|
81
80
|
options = f"""
|
82
|
-
|
83
|
-
[algorithm.ivf]
|
81
|
+
[indexing.ivf]
|
84
82
|
nlist = {self.nlist}
|
85
|
-
|
83
|
+
nsample = {self.nprobe if self.nprobe else 10}
|
86
84
|
{self.parse_quantization()}
|
87
85
|
"""
|
88
86
|
return {"options": options, "metric": self.parse_metric()}
|
@@ -90,14 +88,29 @@ nprob = {self.nprobe if self.nprobe else 10}
|
|
90
88
|
def search_param(self) -> dict:
|
91
89
|
return {"metrics_op": self.parse_metric_fun_op()}
|
92
90
|
|
91
|
+
class IVFFlatSQ8Config(PgVectoRSIndexConfig):
|
92
|
+
nlist: int
|
93
|
+
nprobe: int | None = None
|
94
|
+
index: IndexType = IndexType.IVFSQ8
|
95
|
+
|
96
|
+
def index_param(self) -> dict:
|
97
|
+
options = f"""
|
98
|
+
[indexing.ivf]
|
99
|
+
nlist = {self.nlist}
|
100
|
+
nsample = {self.nprobe if self.nprobe else 10}
|
101
|
+
quantization = {{ scalar = {{ }} }}
|
102
|
+
"""
|
103
|
+
return {"options": options, "metric": self.parse_metric()}
|
104
|
+
|
105
|
+
def search_param(self) -> dict:
|
106
|
+
return {"metrics_op": self.parse_metric_fun_op()}
|
93
107
|
|
94
|
-
class FLATConfig(
|
108
|
+
class FLATConfig(PgVectoRSQuantConfig):
|
95
109
|
index: IndexType = IndexType.Flat
|
96
110
|
|
97
111
|
def index_param(self) -> dict:
|
98
112
|
options = f"""
|
99
|
-
|
100
|
-
[algorithm.flat]
|
113
|
+
[indexing.flat]
|
101
114
|
{self.parse_quantization()}
|
102
115
|
"""
|
103
116
|
return {"options": options, "metric": self.parse_metric()}
|
@@ -107,9 +120,8 @@ capacity = 1048576
|
|
107
120
|
|
108
121
|
|
109
122
|
_pgvecto_rs_case_config = {
|
110
|
-
IndexType.AUTOINDEX: HNSWConfig,
|
111
123
|
IndexType.HNSW: HNSWConfig,
|
112
|
-
IndexType.DISKANN: HNSWConfig,
|
113
124
|
IndexType.IVFFlat: IVFFlatConfig,
|
125
|
+
IndexType.IVFSQ8: IVFFlatSQ8Config,
|
114
126
|
IndexType.Flat: FLATConfig,
|
115
127
|
}
|
@@ -1,18 +1,17 @@
|
|
1
|
-
"""Wrapper around the
|
1
|
+
"""Wrapper around the Pgvecto.rs vector database over VectorDB"""
|
2
2
|
|
3
3
|
import io
|
4
4
|
import logging
|
5
5
|
from contextlib import contextmanager
|
6
6
|
from typing import Any
|
7
7
|
import pandas as pd
|
8
|
-
|
9
8
|
import psycopg2
|
9
|
+
import psycopg2.extras
|
10
10
|
|
11
11
|
from ..api import VectorDB, DBCaseConfig
|
12
12
|
|
13
13
|
log = logging.getLogger(__name__)
|
14
14
|
|
15
|
-
|
16
15
|
class PgVectoRS(VectorDB):
|
17
16
|
"""Use SQLAlchemy instructions"""
|
18
17
|
|
@@ -66,6 +65,8 @@ class PgVectoRS(VectorDB):
|
|
66
65
|
self.conn = psycopg2.connect(**self.db_config)
|
67
66
|
self.conn.autocommit = False
|
68
67
|
self.cursor = self.conn.cursor()
|
68
|
+
self.cursor.execute('SET search_path = "$user", public, vectors')
|
69
|
+
self.conn.commit()
|
69
70
|
|
70
71
|
try:
|
71
72
|
yield
|
@@ -113,7 +114,7 @@ class PgVectoRS(VectorDB):
|
|
113
114
|
self.conn.commit()
|
114
115
|
except Exception as e:
|
115
116
|
log.warning(
|
116
|
-
f"Failed to create
|
117
|
+
f"Failed to create pgvecto.rs table: {self.table_name} error: {e}"
|
117
118
|
)
|
118
119
|
raise e from None
|
119
120
|
|
@@ -127,13 +128,10 @@ class PgVectoRS(VectorDB):
|
|
127
128
|
f'CREATE TABLE IF NOT EXISTS public."{self.table_name}" \
|
128
129
|
(id Integer PRIMARY KEY, embedding vector({dim}));'
|
129
130
|
)
|
130
|
-
self.cursor.execute(
|
131
|
-
f'ALTER TABLE public."{self.table_name}" ALTER COLUMN embedding SET STORAGE PLAIN;'
|
132
|
-
)
|
133
131
|
self.conn.commit()
|
134
132
|
except Exception as e:
|
135
133
|
log.warning(
|
136
|
-
f"Failed to create
|
134
|
+
f"Failed to create pgvecto.rs table: {self.table_name} error: {e}"
|
137
135
|
)
|
138
136
|
raise e from None
|
139
137
|
|
@@ -146,22 +144,24 @@ class PgVectoRS(VectorDB):
|
|
146
144
|
assert self.conn is not None, "Connection is not initialized"
|
147
145
|
assert self.cursor is not None, "Cursor is not initialized"
|
148
146
|
|
147
|
+
assert self.conn is not None, "Connection is not initialized"
|
148
|
+
assert self.cursor is not None, "Cursor is not initialized"
|
149
|
+
|
149
150
|
try:
|
150
|
-
items = {
|
151
|
+
items = {
|
152
|
+
"id": metadata,
|
153
|
+
"embedding": embeddings
|
154
|
+
}
|
151
155
|
df = pd.DataFrame(items)
|
152
156
|
csv_buffer = io.StringIO()
|
153
157
|
df.to_csv(csv_buffer, index=False, header=False)
|
154
158
|
csv_buffer.seek(0)
|
155
|
-
self.cursor.copy_expert(
|
156
|
-
f'COPY public."{self.table_name}" FROM STDIN WITH (FORMAT CSV)',
|
157
|
-
csv_buffer,
|
158
|
-
)
|
159
|
+
self.cursor.copy_expert(f"COPY public.\"{self.table_name}\" FROM STDIN WITH (FORMAT CSV)", csv_buffer)
|
159
160
|
self.conn.commit()
|
160
161
|
return len(metadata), None
|
161
162
|
except Exception as e:
|
162
|
-
log.warning(
|
163
|
-
|
164
|
-
)
|
163
|
+
log.warning(f"Failed to insert data into pgvecto.rs table ({self.table_name}), error: {e}")
|
164
|
+
return 0, e
|
165
165
|
|
166
166
|
def search_embedding(
|
167
167
|
self,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
from pydantic import BaseModel, SecretStr
|
2
|
-
from ..api import DBConfig, DBCaseConfig, MetricType
|
2
|
+
from ..api import DBConfig, DBCaseConfig, IndexType, MetricType
|
3
3
|
|
4
4
|
POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
5
5
|
|
@@ -23,39 +23,78 @@ class PgVectorConfig(DBConfig):
|
|
23
23
|
|
24
24
|
class PgVectorIndexConfig(BaseModel, DBCaseConfig):
|
25
25
|
metric_type: MetricType | None = None
|
26
|
-
|
27
|
-
probes: int | None = 10
|
26
|
+
index: IndexType
|
28
27
|
|
29
|
-
def parse_metric(self) -> str:
|
28
|
+
def parse_metric(self) -> str:
|
30
29
|
if self.metric_type == MetricType.L2:
|
31
30
|
return "vector_l2_ops"
|
32
31
|
elif self.metric_type == MetricType.IP:
|
33
32
|
return "vector_ip_ops"
|
34
33
|
return "vector_cosine_ops"
|
35
|
-
|
34
|
+
|
36
35
|
def parse_metric_fun_op(self) -> str:
|
37
36
|
if self.metric_type == MetricType.L2:
|
38
37
|
return "<->"
|
39
38
|
elif self.metric_type == MetricType.IP:
|
40
39
|
return "<#>"
|
41
40
|
return "<=>"
|
42
|
-
|
43
|
-
def parse_metric_fun_str(self) -> str:
|
41
|
+
|
42
|
+
def parse_metric_fun_str(self) -> str:
|
44
43
|
if self.metric_type == MetricType.L2:
|
45
44
|
return "l2_distance"
|
46
45
|
elif self.metric_type == MetricType.IP:
|
47
46
|
return "max_inner_product"
|
48
47
|
return "cosine_distance"
|
49
48
|
|
49
|
+
|
50
|
+
|
51
|
+
class HNSWConfig(PgVectorIndexConfig):
|
52
|
+
M: int
|
53
|
+
efConstruction: int
|
54
|
+
ef: int | None = None
|
55
|
+
index: IndexType = IndexType.HNSW
|
56
|
+
|
57
|
+
def index_param(self) -> dict:
|
58
|
+
return {
|
59
|
+
"metric_type": self.parse_metric(),
|
60
|
+
"index_type": self.index.value,
|
61
|
+
"params": {"M": self.M, "efConstruction": self.efConstruction},
|
62
|
+
}
|
63
|
+
|
64
|
+
def index_param(self) -> dict:
|
65
|
+
return {
|
66
|
+
"m" : self.M,
|
67
|
+
"efConstruction" : self.efConstruction,
|
68
|
+
"metric" : self.parse_metric()
|
69
|
+
}
|
70
|
+
|
71
|
+
def search_param(self) -> dict:
|
72
|
+
return {
|
73
|
+
"ef" : self.ef,
|
74
|
+
"metric_fun" : self.parse_metric_fun_str(),
|
75
|
+
"metric_fun_op" : self.parse_metric_fun_op(),
|
76
|
+
}
|
77
|
+
|
78
|
+
|
79
|
+
class IVFFlatConfig(PgVectorIndexConfig):
|
80
|
+
lists: int | None = 1000
|
81
|
+
probes: int | None = 10
|
82
|
+
index: IndexType = IndexType.IVFFlat
|
83
|
+
|
50
84
|
def index_param(self) -> dict:
|
51
85
|
return {
|
52
86
|
"lists" : self.lists,
|
53
87
|
"metric" : self.parse_metric()
|
54
88
|
}
|
55
|
-
|
89
|
+
|
56
90
|
def search_param(self) -> dict:
|
57
91
|
return {
|
58
92
|
"probes" : self.probes,
|
59
93
|
"metric_fun" : self.parse_metric_fun_str(),
|
60
94
|
"metric_fun_op" : self.parse_metric_fun_op(),
|
61
|
-
}
|
95
|
+
}
|
96
|
+
|
97
|
+
_pgvector_case_config = {
|
98
|
+
IndexType.HNSW: HNSWConfig,
|
99
|
+
IndexType.IVFFlat: IVFFlatConfig,
|
100
|
+
}
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
8
8
|
import psycopg2
|
9
9
|
import psycopg2.extras
|
10
10
|
|
11
|
-
from ..api import VectorDB, DBCaseConfig
|
11
|
+
from ..api import IndexType, VectorDB, DBCaseConfig
|
12
12
|
|
13
13
|
log = logging.getLogger(__name__)
|
14
14
|
|
@@ -108,7 +108,14 @@ class PgVector(VectorDB):
|
|
108
108
|
assert self.cursor is not None, "Cursor is not initialized"
|
109
109
|
|
110
110
|
index_param = self.case_config.index_param()
|
111
|
-
self.
|
111
|
+
if self.case_config.index == IndexType.HNSW:
|
112
|
+
log.debug(f'Creating HNSW index. m={index_param["m"]}, ef_construction={index_param["ef_construction"]}')
|
113
|
+
self.cursor.execute(f'CREATE INDEX IF NOT EXISTS {self._index_name} ON public."{self.table_name}" USING hnsw (embedding {index_param["metric"]}) WITH (m={index_param["m"]}, ef_construction={index_param["ef_construction"]});')
|
114
|
+
elif self.case_config.index == IndexType.IVFFlat:
|
115
|
+
log.debug(f'Creating IVFFLAT index. list={index_param["lists"]}')
|
116
|
+
self.cursor.execute(f'CREATE INDEX IF NOT EXISTS {self._index_name} ON public."{self.table_name}" USING ivfflat (embedding {index_param["metric"]}) WITH (lists={index_param["lists"]});')
|
117
|
+
else:
|
118
|
+
assert "Invalid index type {self.case_config.index}"
|
112
119
|
self.conn.commit()
|
113
120
|
|
114
121
|
def _create_table(self, dim : int):
|
@@ -164,8 +171,15 @@ class PgVector(VectorDB):
|
|
164
171
|
assert self.cursor is not None, "Cursor is not initialized"
|
165
172
|
|
166
173
|
search_param =self.case_config.search_param()
|
167
|
-
|
168
|
-
self.
|
174
|
+
|
175
|
+
if self.case_config.index == IndexType.HNSW:
|
176
|
+
self.cursor.execute(f'SET hnsw.ef_search = {search_param["ef"]}')
|
177
|
+
self.cursor.execute(f"SELECT id FROM public.\"{self.table_name}\" ORDER BY embedding {search_param['metric_fun_op']} '{query}' LIMIT {k};")
|
178
|
+
elif self.case_config.index == IndexType.IVFFlat:
|
179
|
+
self.cursor.execute(f'SET ivfflat.probes = {search_param["probes"]}')
|
180
|
+
self.cursor.execute(f"SELECT id FROM public.\"{self.table_name}\" ORDER BY embedding {search_param['metric_fun_op']} '{query}' LIMIT {k};")
|
181
|
+
else:
|
182
|
+
assert "Invalid index type {self.case_config.index}"
|
169
183
|
self.conn.commit()
|
170
184
|
result = self.cursor.fetchall()
|
171
185
|
|
@@ -1,18 +1,31 @@
|
|
1
1
|
from pydantic import BaseModel, SecretStr
|
2
2
|
|
3
3
|
from ..api import DBConfig, DBCaseConfig, MetricType
|
4
|
+
from pydantic import validator
|
4
5
|
|
5
|
-
|
6
|
+
# Allowing `api_key` to be left empty, to ensure compatibility with the open-source Qdrant.
|
6
7
|
class QdrantConfig(DBConfig):
|
7
8
|
url: SecretStr
|
8
9
|
api_key: SecretStr
|
9
10
|
|
10
11
|
def to_dict(self) -> dict:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
api_key = self.api_key.get_secret_value()
|
13
|
+
if len(api_key) > 0:
|
14
|
+
return {
|
15
|
+
"url": self.url.get_secret_value(),
|
16
|
+
"api_key": self.api_key.get_secret_value(),
|
17
|
+
"prefer_grpc": True,
|
18
|
+
}
|
19
|
+
else:
|
20
|
+
return {"url": self.url.get_secret_value(),}
|
21
|
+
|
22
|
+
@validator("*")
|
23
|
+
def not_empty_field(cls, v, field):
|
24
|
+
if field.name in ["api_key", "db_label"]:
|
25
|
+
return v
|
26
|
+
if isinstance(v, (str, SecretStr)) and len(v) == 0:
|
27
|
+
raise ValueError("Empty string!")
|
28
|
+
return v
|
16
29
|
|
17
30
|
class QdrantIndexConfig(BaseModel, DBCaseConfig):
|
18
31
|
metric_type: MetricType | None = None
|
@@ -43,8 +43,7 @@ class QdrantCloud(VectorDB):
|
|
43
43
|
if drop_old:
|
44
44
|
log.info(f"QdrantCloud client drop_old collection: {self.collection_name}")
|
45
45
|
tmp_client.delete_collection(self.collection_name)
|
46
|
-
|
47
|
-
self._create_collection(dim, tmp_client)
|
46
|
+
self._create_collection(dim, tmp_client)
|
48
47
|
tmp_client = None
|
49
48
|
|
50
49
|
@contextmanager
|
@@ -110,13 +109,18 @@ class QdrantCloud(VectorDB):
|
|
110
109
|
) -> (int, Exception):
|
111
110
|
"""Insert embeddings into Milvus. should call self.init() first"""
|
112
111
|
assert self.qdrant_client is not None
|
112
|
+
QDRANT_BATCH_SIZE = 500
|
113
113
|
try:
|
114
114
|
# TODO: counts
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
115
|
+
for offset in range(0, len(embeddings), QDRANT_BATCH_SIZE):
|
116
|
+
vectors = embeddings[offset: offset + QDRANT_BATCH_SIZE]
|
117
|
+
ids = metadata[offset: offset + QDRANT_BATCH_SIZE]
|
118
|
+
payloads=[{self._primary_field: v} for v in ids]
|
119
|
+
_ = self.qdrant_client.upsert(
|
120
|
+
collection_name=self.collection_name,
|
121
|
+
wait=True,
|
122
|
+
points=Batch(ids=ids, payloads=payloads, vectors=vectors),
|
123
|
+
)
|
120
124
|
except Exception as e:
|
121
125
|
log.info(f"Failed to insert data, {e}")
|
122
126
|
return 0, e
|
@@ -397,6 +397,11 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
|
|
397
397
|
inputConfig={
|
398
398
|
"options": ["trivial", "scalar", "product"],
|
399
399
|
},
|
400
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
401
|
+
in [
|
402
|
+
IndexType.HNSW.value,
|
403
|
+
IndexType.IVFFlat.value,
|
404
|
+
],
|
400
405
|
)
|
401
406
|
|
402
407
|
CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
@@ -406,7 +411,11 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
|
406
411
|
"options": ["x4", "x8", "x16", "x32", "x64"],
|
407
412
|
},
|
408
413
|
isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
|
409
|
-
== "product",
|
414
|
+
== "product" and config.get(CaseConfigParamType.IndexType, None)
|
415
|
+
in [
|
416
|
+
IndexType.HNSW.value,
|
417
|
+
IndexType.IVFFlat.value,
|
418
|
+
],
|
410
419
|
)
|
411
420
|
|
412
421
|
CaseConfigParamInput_ZillizLevel = CaseConfigInput(
|
@@ -20,6 +20,7 @@
|
|
20
20
|
"db_name": "**********"
|
21
21
|
},
|
22
22
|
"db_case_config": {
|
23
|
+
"index": "IVF_FLAT",
|
23
24
|
"metric_type": "L2",
|
24
25
|
"lists": 10,
|
25
26
|
"probes": 2
|
@@ -49,6 +50,7 @@
|
|
49
50
|
"db_name": "**********"
|
50
51
|
},
|
51
52
|
"db_case_config": {
|
53
|
+
"index": "IVF_FLAT",
|
52
54
|
"metric_type": "L2",
|
53
55
|
"lists": 10,
|
54
56
|
"probes": 2
|
@@ -78,6 +80,7 @@
|
|
78
80
|
"db_name": "**********"
|
79
81
|
},
|
80
82
|
"db_case_config": {
|
83
|
+
"index": "IVF_FLAT",
|
81
84
|
"metric_type": "COSINE",
|
82
85
|
"lists": 10,
|
83
86
|
"probes": 2
|
@@ -107,6 +110,7 @@
|
|
107
110
|
"db_name": "**********"
|
108
111
|
},
|
109
112
|
"db_case_config": {
|
113
|
+
"index": "IVF_FLAT",
|
110
114
|
"metric_type": "COSINE",
|
111
115
|
"lists": 10,
|
112
116
|
"probes": 2
|
@@ -136,6 +140,7 @@
|
|
136
140
|
"db_name": "**********"
|
137
141
|
},
|
138
142
|
"db_case_config": {
|
143
|
+
"index": "IVF_FLAT",
|
139
144
|
"metric_type": "COSINE",
|
140
145
|
"lists": 10,
|
141
146
|
"probes": 2
|
@@ -165,6 +170,7 @@
|
|
165
170
|
"db_name": "**********"
|
166
171
|
},
|
167
172
|
"db_case_config": {
|
173
|
+
"index": "IVF_FLAT",
|
168
174
|
"metric_type": "COSINE",
|
169
175
|
"lists": 10,
|
170
176
|
"probes": 2
|
@@ -194,6 +200,7 @@
|
|
194
200
|
"db_name": "**********"
|
195
201
|
},
|
196
202
|
"db_case_config": {
|
203
|
+
"index": "IVF_FLAT",
|
197
204
|
"metric_type": "COSINE",
|
198
205
|
"lists": 10,
|
199
206
|
"probes": 2
|
@@ -223,6 +230,7 @@
|
|
223
230
|
"db_name": "**********"
|
224
231
|
},
|
225
232
|
"db_case_config": {
|
233
|
+
"index": "IVF_FLAT",
|
226
234
|
"metric_type": "COSINE",
|
227
235
|
"lists": 10,
|
228
236
|
"probes": 2
|
@@ -20,6 +20,7 @@
|
|
20
20
|
"db_name": "**********"
|
21
21
|
},
|
22
22
|
"db_case_config": {
|
23
|
+
"index": "IVF_FLAT",
|
23
24
|
"metric_type": "L2",
|
24
25
|
"lists": 10,
|
25
26
|
"probes": 2
|
@@ -51,7 +52,8 @@
|
|
51
52
|
"db_case_config": {
|
52
53
|
"metric_type": "L2",
|
53
54
|
"lists": 10,
|
54
|
-
"probes": 2
|
55
|
+
"probes": 2,
|
56
|
+
"index": "IVF_FLAT"
|
55
57
|
},
|
56
58
|
"case_config": {
|
57
59
|
"case_id": 11,
|
@@ -80,7 +82,8 @@
|
|
80
82
|
"db_case_config": {
|
81
83
|
"metric_type": "L2",
|
82
84
|
"lists": 10,
|
83
|
-
"probes": 2
|
85
|
+
"probes": 2,
|
86
|
+
"index": "IVF_FLAT"
|
84
87
|
},
|
85
88
|
"case_config": {
|
86
89
|
"case_id": 12,
|
@@ -107,6 +110,7 @@
|
|
107
110
|
"db_name": "**********"
|
108
111
|
},
|
109
112
|
"db_case_config": {
|
113
|
+
"index": "IVF_FLAT",
|
110
114
|
"metric_type": "L2",
|
111
115
|
"lists": 10,
|
112
116
|
"probes": 2
|
@@ -136,6 +140,7 @@
|
|
136
140
|
"db_name": "**********"
|
137
141
|
},
|
138
142
|
"db_case_config": {
|
143
|
+
"index": "IVF_FLAT",
|
139
144
|
"metric_type": "L2",
|
140
145
|
"lists": 10,
|
141
146
|
"probes": 2
|
@@ -165,6 +170,7 @@
|
|
165
170
|
"db_name": "**********"
|
166
171
|
},
|
167
172
|
"db_case_config": {
|
173
|
+
"index": "IVF_FLAT",
|
168
174
|
"metric_type": "L2",
|
169
175
|
"lists": 10,
|
170
176
|
"probes": 2
|
@@ -178,4 +184,4 @@
|
|
178
184
|
}
|
179
185
|
],
|
180
186
|
"file_fmt": "result_{}_{}_{}.json"
|
181
|
-
}
|
187
|
+
}
|