veadk-python 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (49) hide show
  1. veadk/a2a/remote_ve_agent.py +63 -6
  2. veadk/agent.py +6 -0
  3. veadk/agent_builder.py +2 -3
  4. veadk/cli/cli.py +2 -0
  5. veadk/cli/cli_kb.py +75 -0
  6. veadk/cli/cli_prompt.py +9 -2
  7. veadk/cli/cli_web.py +7 -0
  8. veadk/configs/database_configs.py +9 -0
  9. veadk/consts.py +7 -0
  10. veadk/evaluation/adk_evaluator/adk_evaluator.py +5 -2
  11. veadk/evaluation/base_evaluator.py +36 -25
  12. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +5 -3
  13. veadk/integrations/__init__.py +13 -0
  14. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +9 -3
  15. veadk/integrations/ve_tls/utils.py +1 -2
  16. veadk/integrations/ve_tls/ve_tls.py +9 -5
  17. veadk/integrations/ve_tos/ve_tos.py +538 -67
  18. veadk/integrations/ve_viking_db_memory/__init__.py +13 -0
  19. veadk/integrations/ve_viking_db_memory/ve_viking_db_memory.py +293 -0
  20. veadk/knowledgebase/backends/base_backend.py +4 -4
  21. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +162 -50
  22. veadk/knowledgebase/entry.py +25 -0
  23. veadk/knowledgebase/knowledgebase.py +19 -4
  24. veadk/memory/__init__.py +1 -1
  25. veadk/memory/long_term_memory.py +45 -7
  26. veadk/memory/long_term_memory_backends/mem0_backend.py +144 -0
  27. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +4 -8
  28. veadk/memory/short_term_memory.py +9 -3
  29. veadk/memory/short_term_memory_backends/postgresql_backend.py +3 -1
  30. veadk/runner.py +34 -26
  31. veadk/tools/builtin_tools/generate_image.py +389 -0
  32. veadk/tools/builtin_tools/image_edit.py +61 -16
  33. veadk/tools/builtin_tools/image_generate.py +56 -15
  34. veadk/tools/builtin_tools/video_generate.py +41 -41
  35. veadk/tools/builtin_tools/web_search.py +10 -3
  36. veadk/tools/load_knowledgebase_tool.py +14 -8
  37. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +6 -1
  38. veadk/tracing/telemetry/attributes/extractors/tool_attributes_extractors.py +7 -0
  39. veadk/tracing/telemetry/exporters/apmplus_exporter.py +82 -2
  40. veadk/tracing/telemetry/exporters/inmemory_exporter.py +8 -2
  41. veadk/tracing/telemetry/opentelemetry_tracer.py +8 -2
  42. veadk/tracing/telemetry/telemetry.py +41 -5
  43. veadk/version.py +1 -1
  44. {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/METADATA +15 -4
  45. {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/RECORD +49 -42
  46. {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/WHEEL +0 -0
  47. {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/entry_points.txt +0 -0
  48. {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/licenses/LICENSE +0 -0
  49. {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,293 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import threading
17
+
18
+ from volcengine.ApiInfo import ApiInfo
19
+ from volcengine.auth.SignerV4 import SignerV4
20
+ from volcengine.base.Service import Service
21
+ from volcengine.Credentials import Credentials
22
+ from volcengine.ServiceInfo import ServiceInfo
23
+
24
+
25
+ class VikingDBMemoryException(Exception):
26
+ def __init__(self, code, request_id, message=None):
27
+ self.code = code
28
+ self.request_id = request_id
29
+ self.message = "{}, code:{},request_id:{}".format(
30
+ message, self.code, self.request_id
31
+ )
32
+
33
+ def __str__(self):
34
+ return self.message
35
+
36
+
37
+ class VikingDBMemoryClient(Service):
38
+ _instance_lock = threading.Lock()
39
+
40
+ def __new__(cls, *args, **kwargs):
41
+ if not hasattr(VikingDBMemoryClient, "_instance"):
42
+ with VikingDBMemoryClient._instance_lock:
43
+ if not hasattr(VikingDBMemoryClient, "_instance"):
44
+ VikingDBMemoryClient._instance = object.__new__(cls)
45
+ return VikingDBMemoryClient._instance
46
+
47
+ def __init__(
48
+ self,
49
+ host="api-knowledgebase.mlp.cn-beijing.volces.com",
50
+ region="cn-beijing",
51
+ ak="",
52
+ sk="",
53
+ sts_token="",
54
+ scheme="http",
55
+ connection_timeout=30,
56
+ socket_timeout=30,
57
+ ):
58
+ self.service_info = VikingDBMemoryClient.get_service_info(
59
+ host, region, scheme, connection_timeout, socket_timeout
60
+ )
61
+ self.api_info = VikingDBMemoryClient.get_api_info()
62
+ super(VikingDBMemoryClient, self).__init__(self.service_info, self.api_info)
63
+ if ak:
64
+ self.set_ak(ak)
65
+ if sk:
66
+ self.set_sk(sk)
67
+ if sts_token:
68
+ self.set_session_token(session_token=sts_token)
69
+ try:
70
+ self.get_body("Ping", {}, json.dumps({}))
71
+ except Exception as e:
72
+ raise VikingDBMemoryException(
73
+ 1000028, "missed", "host or region is incorrect: {}".format(str(e))
74
+ ) from None
75
+
76
+ def setHeader(self, header):
77
+ api_info = VikingDBMemoryClient.get_api_info()
78
+ for key in api_info:
79
+ for item in header:
80
+ api_info[key].header[item] = header[item]
81
+ self.api_info = api_info
82
+
83
+ @staticmethod
84
+ def get_service_info(host, region, scheme, connection_timeout, socket_timeout):
85
+ service_info = ServiceInfo(
86
+ host,
87
+ {"Host": host},
88
+ Credentials("", "", "air", region),
89
+ connection_timeout,
90
+ socket_timeout,
91
+ scheme=scheme,
92
+ )
93
+ return service_info
94
+
95
+ @staticmethod
96
+ def get_api_info():
97
+ api_info = {
98
+ "CreateCollection": ApiInfo(
99
+ "POST",
100
+ "/api/memory/collection/create",
101
+ {},
102
+ {},
103
+ {"Accept": "application/json", "Content-Type": "application/json"},
104
+ ),
105
+ "GetCollection": ApiInfo(
106
+ "POST",
107
+ "/api/memory/collection/info",
108
+ {},
109
+ {},
110
+ {"Accept": "application/json", "Content-Type": "application/json"},
111
+ ),
112
+ "DropCollection": ApiInfo(
113
+ "POST",
114
+ "/api/memory/collection/delete",
115
+ {},
116
+ {},
117
+ {"Accept": "application/json", "Content-Type": "application/json"},
118
+ ),
119
+ "UpdateCollection": ApiInfo(
120
+ "POST",
121
+ "/api/memory/collection/update",
122
+ {},
123
+ {},
124
+ {"Accept": "application/json", "Content-Type": "application/json"},
125
+ ),
126
+ "SearchMemory": ApiInfo(
127
+ "POST",
128
+ "/api/memory/search",
129
+ {},
130
+ {},
131
+ {"Accept": "application/json", "Content-Type": "application/json"},
132
+ ),
133
+ "AddMessages": ApiInfo(
134
+ "POST",
135
+ "/api/memory/messages/add",
136
+ {},
137
+ {},
138
+ {"Accept": "application/json", "Content-Type": "application/json"},
139
+ ),
140
+ "Ping": ApiInfo(
141
+ "GET",
142
+ "/api/memory/ping",
143
+ {},
144
+ {},
145
+ {"Accept": "application/json", "Content-Type": "application/json"},
146
+ ),
147
+ }
148
+ return api_info
149
+
150
+ def get_body(self, api, params, body):
151
+ if api not in self.api_info:
152
+ raise Exception("no such api")
153
+ api_info = self.api_info[api]
154
+ r = self.prepare_request(api_info, params)
155
+ r.headers["Content-Type"] = "application/json"
156
+ r.headers["Traffic-Source"] = "SDK"
157
+ r.body = body
158
+
159
+ SignerV4.sign(r, self.service_info.credentials)
160
+
161
+ url = r.build()
162
+ resp = self.session.get(
163
+ url,
164
+ headers=r.headers,
165
+ data=r.body,
166
+ timeout=(
167
+ self.service_info.connection_timeout,
168
+ self.service_info.socket_timeout,
169
+ ),
170
+ )
171
+ if resp.status_code == 200:
172
+ return json.dumps(resp.json())
173
+ else:
174
+ raise Exception(resp.text.encode("utf-8"))
175
+
176
+ def get_body_exception(self, api, params, body):
177
+ try:
178
+ res = self.get_body(api, params, body)
179
+ except Exception as e:
180
+ try:
181
+ res_json = json.loads(e.args[0].decode("utf-8"))
182
+ except Exception as e:
183
+ raise VikingDBMemoryException(
184
+ 1000028, "missed", "json load res error, res:{}".format(str(e))
185
+ ) from None
186
+ code = res_json.get("code", 1000028)
187
+ request_id = res_json.get("request_id", 1000028)
188
+ message = res_json.get("message", None)
189
+
190
+ raise VikingDBMemoryException(code, request_id, message)
191
+
192
+ if res == "":
193
+ raise VikingDBMemoryException(
194
+ 1000028,
195
+ "missed",
196
+ "empty response due to unknown error, please contact customer service",
197
+ ) from None
198
+ return res
199
+
200
+ def get_exception(self, api, params):
201
+ try:
202
+ res = self.get(api, params)
203
+ except Exception as e:
204
+ try:
205
+ res_json = json.loads(e.args[0].decode("utf-8"))
206
+ except Exception as e:
207
+ raise VikingDBMemoryException(
208
+ 1000028, "missed", "json load res error, res:{}".format(str(e))
209
+ ) from None
210
+ code = res_json.get("code", 1000028)
211
+ request_id = res_json.get("request_id", 1000028)
212
+ message = res_json.get("message", None)
213
+ raise VikingDBMemoryException(code, request_id, message)
214
+ if res == "":
215
+ raise VikingDBMemoryException(
216
+ 1000028,
217
+ "missed",
218
+ "empty response due to unknown error, please contact customer service",
219
+ ) from None
220
+ return res
221
+
222
+ def create_collection(
223
+ self,
224
+ collection_name,
225
+ description="",
226
+ custom_event_type_schemas=[],
227
+ custom_entity_type_schemas=[],
228
+ builtin_event_types=[],
229
+ builtin_entity_types=[],
230
+ ):
231
+ params = {
232
+ "CollectionName": collection_name,
233
+ "Description": description,
234
+ "CustomEventTypeSchemas": custom_event_type_schemas,
235
+ "CustomEntityTypeSchemas": custom_entity_type_schemas,
236
+ "BuiltinEventTypes": builtin_event_types,
237
+ "BuiltinEntityTypes": builtin_entity_types,
238
+ }
239
+ res = self.json("CreateCollection", {}, json.dumps(params))
240
+ return json.loads(res)
241
+
242
+ def get_collection(self, collection_name):
243
+ params = {"CollectionName": collection_name}
244
+ res = self.json("GetCollection", {}, json.dumps(params))
245
+ return json.loads(res)
246
+
247
+ def drop_collection(self, collection_name):
248
+ params = {"CollectionName": collection_name}
249
+ res = self.json("DropCollection", {}, json.dumps(params))
250
+ return json.loads(res)
251
+
252
+ def update_collection(
253
+ self,
254
+ collection_name,
255
+ custom_event_type_schemas=[],
256
+ custom_entity_type_schemas=[],
257
+ builtin_event_types=[],
258
+ builtin_entity_types=[],
259
+ ):
260
+ params = {
261
+ "CollectionName": collection_name,
262
+ "CustomEventTypeSchemas": custom_event_type_schemas,
263
+ "CustomEntityTypeSchemas": custom_entity_type_schemas,
264
+ "BuiltinEventTypes": builtin_event_types,
265
+ "BuiltinEntityTypes": builtin_entity_types,
266
+ }
267
+ res = self.json("UpdateCollection", {}, json.dumps(params))
268
+ return json.loads(res)
269
+
270
+ def search_memory(self, collection_name, query, filter, limit=10):
271
+ params = {
272
+ "collection_name": collection_name,
273
+ "limit": limit,
274
+ "filter": filter,
275
+ }
276
+ if query:
277
+ params["query"] = query
278
+ res = self.json("SearchMemory", {}, json.dumps(params))
279
+ return json.loads(res)
280
+
281
+ def add_messages(
282
+ self, collection_name, session_id, messages, metadata, entities=None
283
+ ):
284
+ params = {
285
+ "collection_name": collection_name,
286
+ "session_id": session_id,
287
+ "messages": messages,
288
+ "metadata": metadata,
289
+ }
290
+ if entities is not None:
291
+ params["entities"] = entities
292
+ res = self.json("AddMessages", {}, json.dumps(params))
293
+ return json.loads(res)
@@ -29,19 +29,19 @@ class BaseKnowledgebaseBackend(ABC, BaseModel):
29
29
  """
30
30
 
31
31
  @abstractmethod
32
- def add_from_directory(self, directory: str, **kwargs) -> bool:
32
+ def add_from_directory(self, directory: str, *args, **kwargs) -> bool:
33
33
  """Add knowledge from file path to knowledgebase"""
34
34
 
35
35
  @abstractmethod
36
- def add_from_files(self, files: list[str], **kwargs) -> bool:
36
+ def add_from_files(self, files: list[str], *args, **kwargs) -> bool:
37
37
  """Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
38
38
 
39
39
  @abstractmethod
40
- def add_from_text(self, text: str | list[str], **kwargs) -> bool:
40
+ def add_from_text(self, text: str | list[str], *args, **kwargs) -> bool:
41
41
  """Add knowledge from text to knowledgebase"""
42
42
 
43
43
  @abstractmethod
44
- def search(self, **kwargs) -> list:
44
+ def search(self, *args, **kwargs) -> list:
45
45
  """Search knowledge from knowledgebase"""
46
46
 
47
47
  # Optional methods for future use:
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import asyncio
16
+ import json
16
17
  import re
17
18
  from pathlib import Path
18
19
  from typing import Any, Literal
@@ -24,9 +25,9 @@ from typing_extensions import override
24
25
  import veadk.config # noqa E401
25
26
  from veadk.config import getenv
26
27
  from veadk.configs.database_configs import NormalTOSConfig, TOSConfig
27
- from veadk.consts import DEFAULT_TOS_BUCKET_NAME
28
28
  from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
29
29
  from veadk.knowledgebase.backends.utils import build_vikingdb_knowledgebase_request
30
+ from veadk.knowledgebase.entry import KnowledgebaseEntry
30
31
  from veadk.utils.logger import get_logger
31
32
  from veadk.utils.misc import formatted_timestamp
32
33
 
@@ -48,13 +49,6 @@ def _read_file_to_bytes(file_path: str) -> tuple[bytes, str]:
48
49
  return file_content, file_name
49
50
 
50
51
 
51
- def _extract_tos_attributes(**kwargs) -> tuple[str, str]:
52
- """Extract TOS attributes from kwargs"""
53
- tos_bucket_name = kwargs.get("tos_bucket_name", DEFAULT_TOS_BUCKET_NAME)
54
- tos_bucket_path = kwargs.get("tos_bucket_path", "knowledgebase")
55
- return tos_bucket_name, tos_bucket_path
56
-
57
-
58
52
  def get_files_in_directory(directory: str):
59
53
  dir_path = Path(directory)
60
54
  if not dir_path.is_dir():
@@ -109,15 +103,27 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
109
103
  )
110
104
 
111
105
  @override
112
- def add_from_directory(self, directory: str, **kwargs) -> bool:
113
- """
106
+ def add_from_directory(
107
+ self,
108
+ directory: str,
109
+ tos_bucket_name: str | None = None,
110
+ tos_bucket_path: str = "knowledgebase",
111
+ metadata: dict | None = None,
112
+ **kwargs,
113
+ ) -> bool:
114
+ """Add knowledge from a directory to the knowledgebase.
115
+
114
116
  Args:
115
- directory: str, the directory to add to knowledgebase
116
- **kwargs:
117
- - tos_bucket_name: str, the bucket name of TOS
118
- - tos_bucket_path: str, the path of TOS bucket
117
+ directory (str): The directory to add to knowledgebase.
118
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
119
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
120
+ metadata (dict | None, optional): The metadata of the files. Defaults to None.
121
+ **kwargs: Additional keyword arguments.
122
+
123
+ Returns:
124
+ bool: True if successful, False otherwise.
119
125
  """
120
- tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
126
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
121
127
  files = get_files_in_directory(directory=directory)
122
128
  for _file in files:
123
129
  content, file_name = _read_file_to_bytes(_file)
@@ -125,40 +131,64 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
125
131
  content,
126
132
  tos_bucket_name=tos_bucket_name,
127
133
  object_key=f"{tos_bucket_path}/{file_name}",
134
+ metadata=metadata,
128
135
  )
129
136
  self._add_doc(tos_url=tos_url)
130
137
  return True
131
138
 
132
139
  @override
133
- def add_from_files(self, files: list[str], **kwargs) -> bool:
134
- """
140
+ def add_from_files(
141
+ self,
142
+ files: list[str],
143
+ tos_bucket_name: str | None = None,
144
+ tos_bucket_path: str = "knowledgebase",
145
+ metadata: dict | None = None,
146
+ **kwargs,
147
+ ) -> bool:
148
+ """Add knowledge from a directory to the knowledgebase.
149
+
135
150
  Args:
136
- files: list[str], the files to add to knowledgebase
137
- **kwargs:
138
- - tos_bucket_name: str, the bucket name of TOS
139
- - tos_bucket_path: str, the path of TOS bucket
151
+ files (list[str]): The files to add to knowledgebase.
152
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
153
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
154
+ metadata (dict | None, optional): The metadata of the files. Defaults to None.
155
+ **kwargs: Additional keyword arguments.
156
+
157
+ Returns:
158
+ bool: True if successful, False otherwise.
140
159
  """
141
- tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
160
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
142
161
  for _file in files:
143
162
  content, file_name = _read_file_to_bytes(_file)
144
163
  tos_url = self._upload_bytes_to_tos(
145
164
  content,
146
165
  tos_bucket_name=tos_bucket_name,
147
166
  object_key=f"{tos_bucket_path}/{file_name}",
167
+ metadata=metadata,
148
168
  )
149
169
  self._add_doc(tos_url=tos_url)
150
170
  return True
151
171
 
152
172
  @override
153
- def add_from_text(self, text: str | list[str], **kwargs) -> bool:
154
- """
173
+ def add_from_text(
174
+ self,
175
+ text: str | list[str],
176
+ tos_bucket_name: str | None = None,
177
+ tos_bucket_path: str = "knowledgebase",
178
+ metadata: dict | None = None,
179
+ **kwargs,
180
+ ) -> bool:
181
+ """Add knowledge from text to the knowledgebase.
182
+
155
183
  Args:
156
- text: str or list[str], the text to add to knowledgebase
157
- **kwargs:
158
- - tos_bucket_name: str, the bucket name of TOS
159
- - tos_bucket_path: str, the path of TOS bucket
184
+ text (str | list[str]): The text to add to knowledgebase.
185
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
186
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
187
+
188
+ Returns:
189
+ bool: True if successful, False otherwise.
160
190
  """
161
- tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
191
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
162
192
  if isinstance(text, list):
163
193
  object_keys = kwargs.get(
164
194
  "tos_object_keys",
@@ -170,7 +200,7 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
170
200
  for _text, _object_key in zip(text, object_keys):
171
201
  _content = _text.encode("utf-8")
172
202
  tos_url = self._upload_bytes_to_tos(
173
- _content, tos_bucket_name, _object_key
203
+ _content, tos_bucket_name, _object_key, metadata=metadata
174
204
  )
175
205
  self._add_doc(tos_url=tos_url)
176
206
  return True
@@ -179,26 +209,42 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
179
209
  object_key = kwargs.get(
180
210
  "object_key", f"veadk/knowledgebase/{formatted_timestamp()}.txt"
181
211
  )
182
- tos_url = self._upload_bytes_to_tos(content, tos_bucket_name, object_key)
212
+ tos_url = self._upload_bytes_to_tos(
213
+ content, tos_bucket_name, object_key, metadata=metadata
214
+ )
183
215
  self._add_doc(tos_url=tos_url)
184
216
  else:
185
217
  raise ValueError("text must be str or list[str]")
186
218
  return True
187
219
 
188
- def add_from_bytes(self, content: bytes, file_name: str, **kwargs) -> bool:
189
- """
220
+ def add_from_bytes(
221
+ self,
222
+ content: bytes,
223
+ file_name: str,
224
+ tos_bucket_name: str | None = None,
225
+ tos_bucket_path: str = "knowledgebase",
226
+ metadata: dict | None = None,
227
+ **kwargs,
228
+ ) -> bool:
229
+ """Add knowledge from bytes to the knowledgebase.
230
+
190
231
  Args:
191
- content: bytes, the content to add to knowledgebase, bytes
192
- file_name: str, the file name of the content
193
- **kwargs:
194
- - tos_bucket_name: str, the bucket name of TOS
195
- - tos_bucket_path: str, the path of TOS bucket
232
+ content (bytes): The content to add to knowledgebase.
233
+ file_name (str): The file name of the content.
234
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
235
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
236
+ metadata (dict | None, optional): The metadata of the files. Defaults to None.
237
+ **kwargs: Additional keyword arguments.
238
+
239
+ Returns:
240
+ bool: True if successful, False otherwise.
196
241
  """
197
- tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
242
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
198
243
  tos_url = self._upload_bytes_to_tos(
199
244
  content,
200
245
  tos_bucket_name=tos_bucket_name,
201
246
  object_key=f"{tos_bucket_path}/{file_name}",
247
+ metadata=metadata,
202
248
  )
203
249
  response = self._add_doc(tos_url=tos_url)
204
250
  if response["code"] == 0:
@@ -206,8 +252,16 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
206
252
  return False
207
253
 
208
254
  @override
209
- def search(self, query: str, top_k: int = 5) -> list:
210
- return self._search_knowledge(query=query, top_k=top_k)
255
+ def search(
256
+ self,
257
+ query: str,
258
+ top_k: int = 5,
259
+ metadata: dict | None = None,
260
+ rerank: bool = True,
261
+ ) -> list:
262
+ return self._search_knowledge(
263
+ query=query, top_k=top_k, metadata=metadata, rerank=rerank
264
+ )
211
265
 
212
266
  def delete_collection(self) -> bool:
213
267
  DELETE_COLLECTION_PATH = "/api/knowledge/collection/delete"
@@ -330,7 +384,7 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
330
384
  response = self._do_request(
331
385
  body={
332
386
  "name": self.index,
333
- "project": "default",
387
+ "project": self.volcengine_project,
334
388
  "description": "Created by Volcengine Agent Development Kit (VeADK).",
335
389
  },
336
390
  path=CREATE_COLLECTION_PATH,
@@ -343,10 +397,27 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
343
397
  )
344
398
 
345
399
  def _upload_bytes_to_tos(
346
- self, content: bytes, tos_bucket_name: str, object_key: str
400
+ self,
401
+ content: bytes,
402
+ tos_bucket_name: str,
403
+ object_key: str,
404
+ metadata: dict | None = None,
347
405
  ) -> str:
406
+ # Here, we set the metadata via the TOS object, ref: https://www.volcengine.com/docs/84313/1254624
348
407
  self._tos_client.bucket_name = tos_bucket_name
349
- asyncio.run(self._tos_client.upload(object_key=object_key, data=content))
408
+ coro = self._tos_client.upload(
409
+ object_key=object_key,
410
+ bucket_name=tos_bucket_name,
411
+ data=content,
412
+ metadata=metadata,
413
+ )
414
+ try:
415
+ loop = asyncio.get_running_loop()
416
+ loop.run_until_complete(
417
+ coro
418
+ ) if not loop.is_running() else asyncio.ensure_future(coro)
419
+ except RuntimeError:
420
+ asyncio.run(coro)
350
421
  return f"{self._tos_client.bucket_name}/{object_key}"
351
422
 
352
423
  def _add_doc(self, tos_url: str) -> Any:
@@ -355,7 +426,7 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
355
426
  response = self._do_request(
356
427
  body={
357
428
  "collection_name": self.index,
358
- "project": "default",
429
+ "project": self.volcengine_project,
359
430
  "add_type": "tos",
360
431
  "tos_path": tos_url,
361
432
  },
@@ -364,14 +435,43 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
364
435
  )
365
436
  return response
366
437
 
367
- def _search_knowledge(self, query: str, top_k: int = 5) -> list[str]:
438
+ def _search_knowledge(
439
+ self,
440
+ query: str,
441
+ top_k: int = 5,
442
+ metadata: dict | None = None,
443
+ rerank: bool = True,
444
+ chunk_diffusion_count: int | None = 3,
445
+ ) -> list[KnowledgebaseEntry]:
368
446
  SEARCH_KNOWLEDGE_PATH = "/api/knowledge/collection/search_knowledge"
369
447
 
448
+ query_param = (
449
+ {
450
+ "doc_filter": {
451
+ "op": "and",
452
+ "conds": [
453
+ {"op": "must", "field": str(k), "conds": [str(v)]}
454
+ for k, v in metadata.items()
455
+ ],
456
+ }
457
+ }
458
+ if metadata
459
+ else None
460
+ )
461
+
462
+ post_precessing = {
463
+ "rerank_swich": rerank,
464
+ "chunk_diffusion_count": chunk_diffusion_count,
465
+ }
466
+
370
467
  response = self._do_request(
371
468
  body={
372
469
  "name": self.index,
470
+ "project": self.volcengine_project,
373
471
  "query": query,
374
472
  "limit": top_k,
473
+ "query_param": query_param,
474
+ "post_processing": post_precessing,
375
475
  },
376
476
  path=SEARCH_KNOWLEDGE_PATH,
377
477
  method="POST",
@@ -382,11 +482,19 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
382
482
  f"Error during knowledge search: {response.get('code')}, message: {response.get('message')}"
383
483
  )
384
484
 
385
- search_result_list = response.get("data", {}).get("result_list", [])
485
+ entries = []
486
+ for result in response.get("data", {}).get("result_list", []):
487
+ doc_meta_raw_str = result.get("doc_info", {}).get("doc_meta")
488
+ doc_meta_list = json.loads(doc_meta_raw_str) if doc_meta_raw_str else []
489
+ metadata = {}
490
+ for meta in doc_meta_list:
491
+ metadata[meta["field_name"]] = meta["field_value"]
386
492
 
387
- return [
388
- search_result.get("content", "") for search_result in search_result_list
389
- ]
493
+ entries.append(
494
+ KnowledgebaseEntry(content=result.get("content", ""), metadata=metadata)
495
+ )
496
+
497
+ return entries
390
498
 
391
499
  def _do_request(
392
500
  self,
@@ -409,4 +517,8 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
409
517
  headers=request.headers,
410
518
  data=request.body,
411
519
  )
520
+ if not response.ok:
521
+ logger.error(
522
+ f"VikingDBKnowledgeBackend error during request: {response.json()}"
523
+ )
412
524
  return response.json()