veadk-python 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/agent.py +3 -2
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/cli/cli.py +3 -1
- veadk/cli/cli_eval.py +160 -0
- veadk/cli/cli_prompt.py +9 -2
- veadk/cli/cli_web.py +6 -1
- veadk/configs/database_configs.py +43 -0
- veadk/configs/model_configs.py +32 -0
- veadk/consts.py +11 -4
- veadk/evaluation/adk_evaluator/adk_evaluator.py +5 -2
- veadk/evaluation/base_evaluator.py +95 -68
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +23 -15
- veadk/evaluation/eval_set_recorder.py +2 -2
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +9 -3
- veadk/integrations/ve_tls/utils.py +1 -2
- veadk/integrations/ve_tls/ve_tls.py +9 -5
- veadk/integrations/ve_tos/ve_tos.py +542 -68
- veadk/knowledgebase/backends/base_backend.py +59 -0
- veadk/knowledgebase/backends/in_memory_backend.py +82 -0
- veadk/knowledgebase/backends/opensearch_backend.py +136 -0
- veadk/knowledgebase/backends/redis_backend.py +144 -0
- veadk/knowledgebase/backends/utils.py +91 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +524 -0
- veadk/{database/__init__.py → knowledgebase/entry.py} +10 -2
- veadk/knowledgebase/knowledgebase.py +120 -139
- veadk/memory/__init__.py +22 -0
- veadk/memory/long_term_memory.py +124 -41
- veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
- veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
- veadk/memory/long_term_memory_backends/mem0_backend.py +129 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
- veadk/memory/short_term_memory.py +80 -72
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
- veadk/runner.py +12 -19
- veadk/tools/builtin_tools/generate_image.py +355 -0
- veadk/tools/builtin_tools/image_edit.py +56 -16
- veadk/tools/builtin_tools/image_generate.py +51 -15
- veadk/tools/builtin_tools/video_generate.py +41 -41
- veadk/tools/builtin_tools/web_scraper.py +1 -1
- veadk/tools/builtin_tools/web_search.py +7 -7
- veadk/tools/load_knowledgebase_tool.py +2 -8
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +21 -3
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +24 -6
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +2 -0
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
- veadk/tracing/telemetry/exporters/tls_exporter.py +2 -0
- veadk/tracing/telemetry/opentelemetry_tracer.py +13 -10
- veadk/tracing/telemetry/telemetry.py +66 -63
- veadk/utils/misc.py +15 -0
- veadk/version.py +1 -1
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/METADATA +28 -5
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/RECORD +65 -56
- veadk/database/database_adapter.py +0 -533
- veadk/database/database_factory.py +0 -80
- veadk/database/kv/redis_database.py +0 -159
- veadk/database/local_database.py +0 -62
- veadk/database/relational/mysql_database.py +0 -173
- veadk/database/vector/opensearch_vector_database.py +0 -263
- veadk/database/vector/type.py +0 -50
- veadk/database/viking/__init__.py +0 -13
- veadk/database/viking/viking_database.py +0 -638
- veadk/database/viking/viking_memory_db.py +0 -525
- /veadk/{database/kv → knowledgebase/backends}/__init__.py +0 -0
- /veadk/{database/relational → memory/long_term_memory_backends}/__init__.py +0 -0
- /veadk/{database/vector → memory/short_term_memory_backends}/__init__.py +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import json
|
|
17
|
+
import re
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Literal
|
|
20
|
+
|
|
21
|
+
import requests
|
|
22
|
+
from pydantic import Field
|
|
23
|
+
from typing_extensions import override
|
|
24
|
+
|
|
25
|
+
import veadk.config # noqa E401
|
|
26
|
+
from veadk.config import getenv
|
|
27
|
+
from veadk.configs.database_configs import NormalTOSConfig, TOSConfig
|
|
28
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
29
|
+
from veadk.knowledgebase.backends.utils import build_vikingdb_knowledgebase_request
|
|
30
|
+
from veadk.knowledgebase.entry import KnowledgebaseEntry
|
|
31
|
+
from veadk.utils.logger import get_logger
|
|
32
|
+
from veadk.utils.misc import formatted_timestamp
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
from veadk.integrations.ve_tos.ve_tos import VeTOS
|
|
36
|
+
except ImportError:
|
|
37
|
+
raise ImportError(
|
|
38
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
logger = get_logger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _read_file_to_bytes(file_path: str) -> tuple[bytes, str]:
|
|
45
|
+
"""Read file content to bytes, and file name"""
|
|
46
|
+
with open(file_path, "rb") as f:
|
|
47
|
+
file_content = f.read()
|
|
48
|
+
file_name = file_path.split("/")[-1]
|
|
49
|
+
return file_content, file_name
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_files_in_directory(directory: str):
|
|
53
|
+
dir_path = Path(directory)
|
|
54
|
+
if not dir_path.is_dir():
|
|
55
|
+
raise ValueError(f"The directory does not exist: {directory}")
|
|
56
|
+
file_paths = [str(file) for file in dir_path.iterdir() if file.is_file()]
|
|
57
|
+
return file_paths
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
61
|
+
volcengine_access_key: str = Field(
|
|
62
|
+
default_factory=lambda: getenv("VOLCENGINE_ACCESS_KEY")
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
volcengine_secret_key: str = Field(
|
|
66
|
+
default_factory=lambda: getenv("VOLCENGINE_SECRET_KEY")
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
volcengine_project: str = "default"
|
|
70
|
+
"""VikingDB knowledgebase project in Volcengine console platform. Default by `default`"""
|
|
71
|
+
|
|
72
|
+
region: str = "cn-beijing"
|
|
73
|
+
"""VikingDB knowledgebase region"""
|
|
74
|
+
|
|
75
|
+
tos_config: TOSConfig | NormalTOSConfig = Field(default_factory=TOSConfig)
|
|
76
|
+
"""TOS config, used to upload files to TOS"""
|
|
77
|
+
|
|
78
|
+
def precheck_index_naming(self):
|
|
79
|
+
if not (
|
|
80
|
+
isinstance(self.index, str)
|
|
81
|
+
and 0 < len(self.index) <= 128
|
|
82
|
+
and re.fullmatch(r"^[a-zA-Z][a-zA-Z0-9_]*$", self.index)
|
|
83
|
+
):
|
|
84
|
+
raise ValueError(
|
|
85
|
+
"The index name does not conform to the rules: "
|
|
86
|
+
"it must start with an English letter, contain only letters, numbers, and underscores, and have a length of 1-128."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def model_post_init(self, __context: Any) -> None:
|
|
90
|
+
self.precheck_index_naming()
|
|
91
|
+
|
|
92
|
+
# check whether collection exist, if not, create it
|
|
93
|
+
if not self.collection_status()["existed"]:
|
|
94
|
+
logger.warning(
|
|
95
|
+
f"VikingDB knowledgebase collection {self.index} does not exist, please create it first..."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
self._tos_client = VeTOS(
|
|
99
|
+
ak=self.volcengine_access_key,
|
|
100
|
+
sk=self.volcengine_secret_key,
|
|
101
|
+
region=self.tos_config.region,
|
|
102
|
+
bucket_name=self.tos_config.bucket,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
@override
|
|
106
|
+
def add_from_directory(
|
|
107
|
+
self,
|
|
108
|
+
directory: str,
|
|
109
|
+
tos_bucket_name: str | None = None,
|
|
110
|
+
tos_bucket_path: str = "knowledgebase",
|
|
111
|
+
metadata: dict | None = None,
|
|
112
|
+
**kwargs,
|
|
113
|
+
) -> bool:
|
|
114
|
+
"""Add knowledge from a directory to the knowledgebase.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
directory (str): The directory to add to knowledgebase.
|
|
118
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
119
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
120
|
+
metadata (dict | None, optional): The metadata of the files. Defaults to None.
|
|
121
|
+
**kwargs: Additional keyword arguments.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
bool: True if successful, False otherwise.
|
|
125
|
+
"""
|
|
126
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
127
|
+
files = get_files_in_directory(directory=directory)
|
|
128
|
+
for _file in files:
|
|
129
|
+
content, file_name = _read_file_to_bytes(_file)
|
|
130
|
+
tos_url = self._upload_bytes_to_tos(
|
|
131
|
+
content,
|
|
132
|
+
tos_bucket_name=tos_bucket_name,
|
|
133
|
+
object_key=f"{tos_bucket_path}/{file_name}",
|
|
134
|
+
metadata=metadata,
|
|
135
|
+
)
|
|
136
|
+
self._add_doc(tos_url=tos_url)
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
@override
|
|
140
|
+
def add_from_files(
|
|
141
|
+
self,
|
|
142
|
+
files: list[str],
|
|
143
|
+
tos_bucket_name: str | None = None,
|
|
144
|
+
tos_bucket_path: str = "knowledgebase",
|
|
145
|
+
metadata: dict | None = None,
|
|
146
|
+
**kwargs,
|
|
147
|
+
) -> bool:
|
|
148
|
+
"""Add knowledge from a directory to the knowledgebase.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
files (list[str]): The files to add to knowledgebase.
|
|
152
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
153
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
154
|
+
metadata (dict | None, optional): The metadata of the files. Defaults to None.
|
|
155
|
+
**kwargs: Additional keyword arguments.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
bool: True if successful, False otherwise.
|
|
159
|
+
"""
|
|
160
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
161
|
+
for _file in files:
|
|
162
|
+
content, file_name = _read_file_to_bytes(_file)
|
|
163
|
+
tos_url = self._upload_bytes_to_tos(
|
|
164
|
+
content,
|
|
165
|
+
tos_bucket_name=tos_bucket_name,
|
|
166
|
+
object_key=f"{tos_bucket_path}/{file_name}",
|
|
167
|
+
metadata=metadata,
|
|
168
|
+
)
|
|
169
|
+
self._add_doc(tos_url=tos_url)
|
|
170
|
+
return True
|
|
171
|
+
|
|
172
|
+
@override
|
|
173
|
+
def add_from_text(
|
|
174
|
+
self,
|
|
175
|
+
text: str | list[str],
|
|
176
|
+
tos_bucket_name: str | None = None,
|
|
177
|
+
tos_bucket_path: str = "knowledgebase",
|
|
178
|
+
metadata: dict | None = None,
|
|
179
|
+
**kwargs,
|
|
180
|
+
) -> bool:
|
|
181
|
+
"""Add knowledge from text to the knowledgebase.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
text (str | list[str]): The text to add to knowledgebase.
|
|
185
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
186
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
bool: True if successful, False otherwise.
|
|
190
|
+
"""
|
|
191
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
192
|
+
if isinstance(text, list):
|
|
193
|
+
object_keys = kwargs.get(
|
|
194
|
+
"tos_object_keys",
|
|
195
|
+
[
|
|
196
|
+
f"{tos_bucket_path}/{formatted_timestamp()}-{i}.txt"
|
|
197
|
+
for i, _ in enumerate(text)
|
|
198
|
+
],
|
|
199
|
+
)
|
|
200
|
+
for _text, _object_key in zip(text, object_keys):
|
|
201
|
+
_content = _text.encode("utf-8")
|
|
202
|
+
tos_url = self._upload_bytes_to_tos(
|
|
203
|
+
_content, tos_bucket_name, _object_key, metadata=metadata
|
|
204
|
+
)
|
|
205
|
+
self._add_doc(tos_url=tos_url)
|
|
206
|
+
return True
|
|
207
|
+
elif isinstance(text, str):
|
|
208
|
+
content = text.encode("utf-8")
|
|
209
|
+
object_key = kwargs.get(
|
|
210
|
+
"object_key", f"veadk/knowledgebase/{formatted_timestamp()}.txt"
|
|
211
|
+
)
|
|
212
|
+
tos_url = self._upload_bytes_to_tos(
|
|
213
|
+
content, tos_bucket_name, object_key, metadata=metadata
|
|
214
|
+
)
|
|
215
|
+
self._add_doc(tos_url=tos_url)
|
|
216
|
+
else:
|
|
217
|
+
raise ValueError("text must be str or list[str]")
|
|
218
|
+
return True
|
|
219
|
+
|
|
220
|
+
def add_from_bytes(
|
|
221
|
+
self,
|
|
222
|
+
content: bytes,
|
|
223
|
+
file_name: str,
|
|
224
|
+
tos_bucket_name: str | None = None,
|
|
225
|
+
tos_bucket_path: str = "knowledgebase",
|
|
226
|
+
metadata: dict | None = None,
|
|
227
|
+
**kwargs,
|
|
228
|
+
) -> bool:
|
|
229
|
+
"""Add knowledge from bytes to the knowledgebase.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
content (bytes): The content to add to knowledgebase.
|
|
233
|
+
file_name (str): The file name of the content.
|
|
234
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
235
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
236
|
+
metadata (dict | None, optional): The metadata of the files. Defaults to None.
|
|
237
|
+
**kwargs: Additional keyword arguments.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
bool: True if successful, False otherwise.
|
|
241
|
+
"""
|
|
242
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
243
|
+
tos_url = self._upload_bytes_to_tos(
|
|
244
|
+
content,
|
|
245
|
+
tos_bucket_name=tos_bucket_name,
|
|
246
|
+
object_key=f"{tos_bucket_path}/{file_name}",
|
|
247
|
+
metadata=metadata,
|
|
248
|
+
)
|
|
249
|
+
response = self._add_doc(tos_url=tos_url)
|
|
250
|
+
if response["code"] == 0:
|
|
251
|
+
return True
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
@override
|
|
255
|
+
def search(
|
|
256
|
+
self,
|
|
257
|
+
query: str,
|
|
258
|
+
top_k: int = 5,
|
|
259
|
+
metadata: dict | None = None,
|
|
260
|
+
rerank: bool = True,
|
|
261
|
+
) -> list:
|
|
262
|
+
return self._search_knowledge(
|
|
263
|
+
query=query, top_k=top_k, metadata=metadata, rerank=rerank
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def delete_collection(self) -> bool:
|
|
267
|
+
DELETE_COLLECTION_PATH = "/api/knowledge/collection/delete"
|
|
268
|
+
|
|
269
|
+
response = self._do_request(
|
|
270
|
+
body={
|
|
271
|
+
"name": self.index,
|
|
272
|
+
"project": self.volcengine_project,
|
|
273
|
+
},
|
|
274
|
+
path=DELETE_COLLECTION_PATH,
|
|
275
|
+
method="POST",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if response.get("code") != 0:
|
|
279
|
+
logger.error(f"Error during collection deletion: {response}")
|
|
280
|
+
return False
|
|
281
|
+
return True
|
|
282
|
+
|
|
283
|
+
def delete_doc_by_id(self, id: str) -> bool:
|
|
284
|
+
DELETE_DOC_PATH = "/api/knowledge/doc/delete"
|
|
285
|
+
response = self._do_request(
|
|
286
|
+
body={
|
|
287
|
+
"collection_name": self.index,
|
|
288
|
+
"project": self.volcengine_project,
|
|
289
|
+
"doc_id": id,
|
|
290
|
+
},
|
|
291
|
+
path=DELETE_DOC_PATH,
|
|
292
|
+
method="POST",
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
if response.get("code") != 0:
|
|
296
|
+
return False
|
|
297
|
+
return True
|
|
298
|
+
|
|
299
|
+
def list_docs(self, offset: int = 0, limit: int = -1):
|
|
300
|
+
"""List documents in collection.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
offset (int): The offset of the first document to return.
|
|
304
|
+
limit (int): The maximum number of documents to return. -1 means return all documents but max is 100.
|
|
305
|
+
"""
|
|
306
|
+
LIST_DOCS_PATH = "/api/knowledge/doc/list"
|
|
307
|
+
response = self._do_request(
|
|
308
|
+
body={
|
|
309
|
+
"collection_name": self.index,
|
|
310
|
+
"project": self.volcengine_project,
|
|
311
|
+
"offset": offset,
|
|
312
|
+
"limit": limit,
|
|
313
|
+
},
|
|
314
|
+
path=LIST_DOCS_PATH,
|
|
315
|
+
method="POST",
|
|
316
|
+
)
|
|
317
|
+
if response.get("code") != 0:
|
|
318
|
+
raise ValueError(f"Error during list documents: {response.get('code')}")
|
|
319
|
+
if not response["data"].get("doc_list", []):
|
|
320
|
+
return []
|
|
321
|
+
return response["data"]["doc_list"]
|
|
322
|
+
|
|
323
|
+
def list_chunks(self, offset: int = 0, limit: int = -1):
|
|
324
|
+
"""List chunks in collection.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
offset (int): The offset of the first chunk to return.
|
|
328
|
+
limit (int): The maximum number of chunks to return. -1 means return all chunks but max is 100.
|
|
329
|
+
"""
|
|
330
|
+
LIST_CHUNKS_PATH = "/api/knowledge/point/list"
|
|
331
|
+
response = self._do_request(
|
|
332
|
+
body={
|
|
333
|
+
"collection_name": self.index,
|
|
334
|
+
"project": self.volcengine_project,
|
|
335
|
+
"offset": offset,
|
|
336
|
+
"limit": limit,
|
|
337
|
+
},
|
|
338
|
+
path=LIST_CHUNKS_PATH,
|
|
339
|
+
method="POST",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
if response.get("code") != 0:
|
|
343
|
+
raise ValueError(f"Error during list chunks: {response}")
|
|
344
|
+
|
|
345
|
+
if not response["data"].get("point_list", []):
|
|
346
|
+
return []
|
|
347
|
+
data = [
|
|
348
|
+
{
|
|
349
|
+
"id": res["point_id"],
|
|
350
|
+
"content": res["content"],
|
|
351
|
+
"metadata": res["doc_info"],
|
|
352
|
+
}
|
|
353
|
+
for res in response["data"]["point_list"]
|
|
354
|
+
]
|
|
355
|
+
return data
|
|
356
|
+
|
|
357
|
+
def collection_status(self):
|
|
358
|
+
COLLECTION_INFO_PATH = "/api/knowledge/collection/info"
|
|
359
|
+
response = self._do_request(
|
|
360
|
+
body={
|
|
361
|
+
"name": self.index,
|
|
362
|
+
"project": self.volcengine_project,
|
|
363
|
+
},
|
|
364
|
+
path=COLLECTION_INFO_PATH,
|
|
365
|
+
method="POST",
|
|
366
|
+
)
|
|
367
|
+
if response["code"] == 0:
|
|
368
|
+
status = response["data"]["pipeline_list"][0]["index_list"][0]["status"]
|
|
369
|
+
return {
|
|
370
|
+
"existed": True,
|
|
371
|
+
"status": status,
|
|
372
|
+
}
|
|
373
|
+
elif response["code"] == 1000005:
|
|
374
|
+
return {
|
|
375
|
+
"existed": False,
|
|
376
|
+
"status": None,
|
|
377
|
+
}
|
|
378
|
+
else:
|
|
379
|
+
raise ValueError(f"Error during collection status: {response}")
|
|
380
|
+
|
|
381
|
+
def create_collection(self) -> None:
|
|
382
|
+
CREATE_COLLECTION_PATH = "/api/knowledge/collection/create"
|
|
383
|
+
|
|
384
|
+
response = self._do_request(
|
|
385
|
+
body={
|
|
386
|
+
"name": self.index,
|
|
387
|
+
"project": self.volcengine_project,
|
|
388
|
+
"description": "Created by Volcengine Agent Development Kit (VeADK).",
|
|
389
|
+
},
|
|
390
|
+
path=CREATE_COLLECTION_PATH,
|
|
391
|
+
method="POST",
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
if response.get("code") != 0:
|
|
395
|
+
raise ValueError(
|
|
396
|
+
f"Error during collection creation: {response.get('code')}"
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _upload_bytes_to_tos(
|
|
400
|
+
self,
|
|
401
|
+
content: bytes,
|
|
402
|
+
tos_bucket_name: str,
|
|
403
|
+
object_key: str,
|
|
404
|
+
metadata: dict | None = None,
|
|
405
|
+
) -> str:
|
|
406
|
+
# Here, we set the metadata via the TOS object, ref: https://www.volcengine.com/docs/84313/1254624
|
|
407
|
+
self._tos_client.bucket_name = tos_bucket_name
|
|
408
|
+
coro = self._tos_client.upload(
|
|
409
|
+
object_key=object_key,
|
|
410
|
+
bucket_name=tos_bucket_name,
|
|
411
|
+
data=content,
|
|
412
|
+
metadata=metadata,
|
|
413
|
+
)
|
|
414
|
+
try:
|
|
415
|
+
loop = asyncio.get_running_loop()
|
|
416
|
+
loop.run_until_complete(
|
|
417
|
+
coro
|
|
418
|
+
) if not loop.is_running() else asyncio.ensure_future(coro)
|
|
419
|
+
except RuntimeError:
|
|
420
|
+
asyncio.run(coro)
|
|
421
|
+
return f"{self._tos_client.bucket_name}/{object_key}"
|
|
422
|
+
|
|
423
|
+
def _add_doc(self, tos_url: str) -> Any:
|
|
424
|
+
ADD_DOC_PATH = "/api/knowledge/doc/add"
|
|
425
|
+
|
|
426
|
+
response = self._do_request(
|
|
427
|
+
body={
|
|
428
|
+
"collection_name": self.index,
|
|
429
|
+
"project": self.volcengine_project,
|
|
430
|
+
"add_type": "tos",
|
|
431
|
+
"tos_path": tos_url,
|
|
432
|
+
},
|
|
433
|
+
path=ADD_DOC_PATH,
|
|
434
|
+
method="POST",
|
|
435
|
+
)
|
|
436
|
+
return response
|
|
437
|
+
|
|
438
|
+
def _search_knowledge(
|
|
439
|
+
self,
|
|
440
|
+
query: str,
|
|
441
|
+
top_k: int = 5,
|
|
442
|
+
metadata: dict | None = None,
|
|
443
|
+
rerank: bool = True,
|
|
444
|
+
chunk_diffusion_count: int | None = 3,
|
|
445
|
+
) -> list[KnowledgebaseEntry]:
|
|
446
|
+
SEARCH_KNOWLEDGE_PATH = "/api/knowledge/collection/search_knowledge"
|
|
447
|
+
|
|
448
|
+
query_param = (
|
|
449
|
+
{
|
|
450
|
+
"doc_filter": {
|
|
451
|
+
"op": "and",
|
|
452
|
+
"conds": [
|
|
453
|
+
{"op": "must", "field": str(k), "conds": [str(v)]}
|
|
454
|
+
for k, v in metadata.items()
|
|
455
|
+
],
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
if metadata
|
|
459
|
+
else None
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
post_precessing = {
|
|
463
|
+
"rerank_swich": rerank,
|
|
464
|
+
"chunk_diffusion_count": chunk_diffusion_count,
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
response = self._do_request(
|
|
468
|
+
body={
|
|
469
|
+
"name": self.index,
|
|
470
|
+
"project": self.volcengine_project,
|
|
471
|
+
"query": query,
|
|
472
|
+
"limit": top_k,
|
|
473
|
+
"query_param": query_param,
|
|
474
|
+
"post_processing": post_precessing,
|
|
475
|
+
},
|
|
476
|
+
path=SEARCH_KNOWLEDGE_PATH,
|
|
477
|
+
method="POST",
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
if response.get("code") != 0:
|
|
481
|
+
raise ValueError(
|
|
482
|
+
f"Error during knowledge search: {response.get('code')}, message: {response.get('message')}"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
entries = []
|
|
486
|
+
for result in response.get("data", {}).get("result_list", []):
|
|
487
|
+
doc_meta_raw_str = result.get("doc_info", {}).get("doc_meta")
|
|
488
|
+
doc_meta_list = json.loads(doc_meta_raw_str) if doc_meta_raw_str else []
|
|
489
|
+
metadata = {}
|
|
490
|
+
for meta in doc_meta_list:
|
|
491
|
+
metadata[meta["field_name"]] = meta["field_value"]
|
|
492
|
+
|
|
493
|
+
entries.append(
|
|
494
|
+
KnowledgebaseEntry(content=result.get("content", ""), metadata=metadata)
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
return entries
|
|
498
|
+
|
|
499
|
+
def _do_request(
|
|
500
|
+
self,
|
|
501
|
+
body: dict,
|
|
502
|
+
path: str,
|
|
503
|
+
method: Literal["GET", "POST", "PUT", "DELETE"] = "POST",
|
|
504
|
+
) -> dict:
|
|
505
|
+
VIKINGDB_KNOWLEDGEBASE_BASE_URL = "api-knowledgebase.mlp.cn-beijing.volces.com"
|
|
506
|
+
|
|
507
|
+
request = build_vikingdb_knowledgebase_request(
|
|
508
|
+
path=path,
|
|
509
|
+
volcengine_access_key=self.volcengine_access_key,
|
|
510
|
+
volcengine_secret_key=self.volcengine_secret_key,
|
|
511
|
+
method=method,
|
|
512
|
+
data=body,
|
|
513
|
+
)
|
|
514
|
+
response = requests.request(
|
|
515
|
+
method=method,
|
|
516
|
+
url=f"https://{VIKINGDB_KNOWLEDGEBASE_BASE_URL}{path}",
|
|
517
|
+
headers=request.headers,
|
|
518
|
+
data=request.body,
|
|
519
|
+
)
|
|
520
|
+
if not response.ok:
|
|
521
|
+
logger.error(
|
|
522
|
+
f"VikingDBKnowledgeBackend error during request: {response.json()}"
|
|
523
|
+
)
|
|
524
|
+
return response.json()
|
|
@@ -12,6 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from pydantic import BaseModel
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
|
|
18
|
+
class KnowledgebaseEntry(BaseModel):
|
|
19
|
+
"""Represents a single entry in the knowledgebase."""
|
|
20
|
+
|
|
21
|
+
# The main content of the knowledgebase entry.
|
|
22
|
+
content: str
|
|
23
|
+
|
|
24
|
+
# Optional metadata associated with the entry.
|
|
25
|
+
metadata: dict | None = None
|