veadk-python 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/a2a/remote_ve_agent.py +63 -6
- veadk/agent.py +6 -0
- veadk/agent_builder.py +2 -3
- veadk/cli/cli.py +2 -0
- veadk/cli/cli_kb.py +75 -0
- veadk/cli/cli_prompt.py +9 -2
- veadk/cli/cli_web.py +7 -0
- veadk/configs/database_configs.py +9 -0
- veadk/consts.py +7 -0
- veadk/evaluation/adk_evaluator/adk_evaluator.py +5 -2
- veadk/evaluation/base_evaluator.py +36 -25
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +5 -3
- veadk/integrations/__init__.py +13 -0
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +9 -3
- veadk/integrations/ve_tls/utils.py +1 -2
- veadk/integrations/ve_tls/ve_tls.py +9 -5
- veadk/integrations/ve_tos/ve_tos.py +538 -67
- veadk/integrations/ve_viking_db_memory/__init__.py +13 -0
- veadk/integrations/ve_viking_db_memory/ve_viking_db_memory.py +293 -0
- veadk/knowledgebase/backends/base_backend.py +4 -4
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +162 -50
- veadk/knowledgebase/entry.py +25 -0
- veadk/knowledgebase/knowledgebase.py +19 -4
- veadk/memory/__init__.py +1 -1
- veadk/memory/long_term_memory.py +45 -7
- veadk/memory/long_term_memory_backends/mem0_backend.py +144 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +4 -8
- veadk/memory/short_term_memory.py +9 -3
- veadk/memory/short_term_memory_backends/postgresql_backend.py +3 -1
- veadk/runner.py +34 -26
- veadk/tools/builtin_tools/generate_image.py +389 -0
- veadk/tools/builtin_tools/image_edit.py +61 -16
- veadk/tools/builtin_tools/image_generate.py +56 -15
- veadk/tools/builtin_tools/video_generate.py +41 -41
- veadk/tools/builtin_tools/web_search.py +10 -3
- veadk/tools/load_knowledgebase_tool.py +14 -8
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +6 -1
- veadk/tracing/telemetry/attributes/extractors/tool_attributes_extractors.py +7 -0
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +82 -2
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +8 -2
- veadk/tracing/telemetry/opentelemetry_tracer.py +8 -2
- veadk/tracing/telemetry/telemetry.py +41 -5
- veadk/version.py +1 -1
- {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/METADATA +15 -4
- {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/RECORD +49 -42
- {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.8.dist-info → veadk_python-0.2.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import threading
|
|
17
|
+
|
|
18
|
+
from volcengine.ApiInfo import ApiInfo
|
|
19
|
+
from volcengine.auth.SignerV4 import SignerV4
|
|
20
|
+
from volcengine.base.Service import Service
|
|
21
|
+
from volcengine.Credentials import Credentials
|
|
22
|
+
from volcengine.ServiceInfo import ServiceInfo
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class VikingDBMemoryException(Exception):
|
|
26
|
+
def __init__(self, code, request_id, message=None):
|
|
27
|
+
self.code = code
|
|
28
|
+
self.request_id = request_id
|
|
29
|
+
self.message = "{}, code:{},request_id:{}".format(
|
|
30
|
+
message, self.code, self.request_id
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def __str__(self):
|
|
34
|
+
return self.message
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class VikingDBMemoryClient(Service):
|
|
38
|
+
_instance_lock = threading.Lock()
|
|
39
|
+
|
|
40
|
+
def __new__(cls, *args, **kwargs):
|
|
41
|
+
if not hasattr(VikingDBMemoryClient, "_instance"):
|
|
42
|
+
with VikingDBMemoryClient._instance_lock:
|
|
43
|
+
if not hasattr(VikingDBMemoryClient, "_instance"):
|
|
44
|
+
VikingDBMemoryClient._instance = object.__new__(cls)
|
|
45
|
+
return VikingDBMemoryClient._instance
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
host="api-knowledgebase.mlp.cn-beijing.volces.com",
|
|
50
|
+
region="cn-beijing",
|
|
51
|
+
ak="",
|
|
52
|
+
sk="",
|
|
53
|
+
sts_token="",
|
|
54
|
+
scheme="http",
|
|
55
|
+
connection_timeout=30,
|
|
56
|
+
socket_timeout=30,
|
|
57
|
+
):
|
|
58
|
+
self.service_info = VikingDBMemoryClient.get_service_info(
|
|
59
|
+
host, region, scheme, connection_timeout, socket_timeout
|
|
60
|
+
)
|
|
61
|
+
self.api_info = VikingDBMemoryClient.get_api_info()
|
|
62
|
+
super(VikingDBMemoryClient, self).__init__(self.service_info, self.api_info)
|
|
63
|
+
if ak:
|
|
64
|
+
self.set_ak(ak)
|
|
65
|
+
if sk:
|
|
66
|
+
self.set_sk(sk)
|
|
67
|
+
if sts_token:
|
|
68
|
+
self.set_session_token(session_token=sts_token)
|
|
69
|
+
try:
|
|
70
|
+
self.get_body("Ping", {}, json.dumps({}))
|
|
71
|
+
except Exception as e:
|
|
72
|
+
raise VikingDBMemoryException(
|
|
73
|
+
1000028, "missed", "host or region is incorrect: {}".format(str(e))
|
|
74
|
+
) from None
|
|
75
|
+
|
|
76
|
+
def setHeader(self, header):
|
|
77
|
+
api_info = VikingDBMemoryClient.get_api_info()
|
|
78
|
+
for key in api_info:
|
|
79
|
+
for item in header:
|
|
80
|
+
api_info[key].header[item] = header[item]
|
|
81
|
+
self.api_info = api_info
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def get_service_info(host, region, scheme, connection_timeout, socket_timeout):
|
|
85
|
+
service_info = ServiceInfo(
|
|
86
|
+
host,
|
|
87
|
+
{"Host": host},
|
|
88
|
+
Credentials("", "", "air", region),
|
|
89
|
+
connection_timeout,
|
|
90
|
+
socket_timeout,
|
|
91
|
+
scheme=scheme,
|
|
92
|
+
)
|
|
93
|
+
return service_info
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def get_api_info():
|
|
97
|
+
api_info = {
|
|
98
|
+
"CreateCollection": ApiInfo(
|
|
99
|
+
"POST",
|
|
100
|
+
"/api/memory/collection/create",
|
|
101
|
+
{},
|
|
102
|
+
{},
|
|
103
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
104
|
+
),
|
|
105
|
+
"GetCollection": ApiInfo(
|
|
106
|
+
"POST",
|
|
107
|
+
"/api/memory/collection/info",
|
|
108
|
+
{},
|
|
109
|
+
{},
|
|
110
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
111
|
+
),
|
|
112
|
+
"DropCollection": ApiInfo(
|
|
113
|
+
"POST",
|
|
114
|
+
"/api/memory/collection/delete",
|
|
115
|
+
{},
|
|
116
|
+
{},
|
|
117
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
118
|
+
),
|
|
119
|
+
"UpdateCollection": ApiInfo(
|
|
120
|
+
"POST",
|
|
121
|
+
"/api/memory/collection/update",
|
|
122
|
+
{},
|
|
123
|
+
{},
|
|
124
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
125
|
+
),
|
|
126
|
+
"SearchMemory": ApiInfo(
|
|
127
|
+
"POST",
|
|
128
|
+
"/api/memory/search",
|
|
129
|
+
{},
|
|
130
|
+
{},
|
|
131
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
132
|
+
),
|
|
133
|
+
"AddMessages": ApiInfo(
|
|
134
|
+
"POST",
|
|
135
|
+
"/api/memory/messages/add",
|
|
136
|
+
{},
|
|
137
|
+
{},
|
|
138
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
139
|
+
),
|
|
140
|
+
"Ping": ApiInfo(
|
|
141
|
+
"GET",
|
|
142
|
+
"/api/memory/ping",
|
|
143
|
+
{},
|
|
144
|
+
{},
|
|
145
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
146
|
+
),
|
|
147
|
+
}
|
|
148
|
+
return api_info
|
|
149
|
+
|
|
150
|
+
def get_body(self, api, params, body):
|
|
151
|
+
if api not in self.api_info:
|
|
152
|
+
raise Exception("no such api")
|
|
153
|
+
api_info = self.api_info[api]
|
|
154
|
+
r = self.prepare_request(api_info, params)
|
|
155
|
+
r.headers["Content-Type"] = "application/json"
|
|
156
|
+
r.headers["Traffic-Source"] = "SDK"
|
|
157
|
+
r.body = body
|
|
158
|
+
|
|
159
|
+
SignerV4.sign(r, self.service_info.credentials)
|
|
160
|
+
|
|
161
|
+
url = r.build()
|
|
162
|
+
resp = self.session.get(
|
|
163
|
+
url,
|
|
164
|
+
headers=r.headers,
|
|
165
|
+
data=r.body,
|
|
166
|
+
timeout=(
|
|
167
|
+
self.service_info.connection_timeout,
|
|
168
|
+
self.service_info.socket_timeout,
|
|
169
|
+
),
|
|
170
|
+
)
|
|
171
|
+
if resp.status_code == 200:
|
|
172
|
+
return json.dumps(resp.json())
|
|
173
|
+
else:
|
|
174
|
+
raise Exception(resp.text.encode("utf-8"))
|
|
175
|
+
|
|
176
|
+
def get_body_exception(self, api, params, body):
|
|
177
|
+
try:
|
|
178
|
+
res = self.get_body(api, params, body)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
try:
|
|
181
|
+
res_json = json.loads(e.args[0].decode("utf-8"))
|
|
182
|
+
except Exception as e:
|
|
183
|
+
raise VikingDBMemoryException(
|
|
184
|
+
1000028, "missed", "json load res error, res:{}".format(str(e))
|
|
185
|
+
) from None
|
|
186
|
+
code = res_json.get("code", 1000028)
|
|
187
|
+
request_id = res_json.get("request_id", 1000028)
|
|
188
|
+
message = res_json.get("message", None)
|
|
189
|
+
|
|
190
|
+
raise VikingDBMemoryException(code, request_id, message)
|
|
191
|
+
|
|
192
|
+
if res == "":
|
|
193
|
+
raise VikingDBMemoryException(
|
|
194
|
+
1000028,
|
|
195
|
+
"missed",
|
|
196
|
+
"empty response due to unknown error, please contact customer service",
|
|
197
|
+
) from None
|
|
198
|
+
return res
|
|
199
|
+
|
|
200
|
+
def get_exception(self, api, params):
|
|
201
|
+
try:
|
|
202
|
+
res = self.get(api, params)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
try:
|
|
205
|
+
res_json = json.loads(e.args[0].decode("utf-8"))
|
|
206
|
+
except Exception as e:
|
|
207
|
+
raise VikingDBMemoryException(
|
|
208
|
+
1000028, "missed", "json load res error, res:{}".format(str(e))
|
|
209
|
+
) from None
|
|
210
|
+
code = res_json.get("code", 1000028)
|
|
211
|
+
request_id = res_json.get("request_id", 1000028)
|
|
212
|
+
message = res_json.get("message", None)
|
|
213
|
+
raise VikingDBMemoryException(code, request_id, message)
|
|
214
|
+
if res == "":
|
|
215
|
+
raise VikingDBMemoryException(
|
|
216
|
+
1000028,
|
|
217
|
+
"missed",
|
|
218
|
+
"empty response due to unknown error, please contact customer service",
|
|
219
|
+
) from None
|
|
220
|
+
return res
|
|
221
|
+
|
|
222
|
+
def create_collection(
|
|
223
|
+
self,
|
|
224
|
+
collection_name,
|
|
225
|
+
description="",
|
|
226
|
+
custom_event_type_schemas=[],
|
|
227
|
+
custom_entity_type_schemas=[],
|
|
228
|
+
builtin_event_types=[],
|
|
229
|
+
builtin_entity_types=[],
|
|
230
|
+
):
|
|
231
|
+
params = {
|
|
232
|
+
"CollectionName": collection_name,
|
|
233
|
+
"Description": description,
|
|
234
|
+
"CustomEventTypeSchemas": custom_event_type_schemas,
|
|
235
|
+
"CustomEntityTypeSchemas": custom_entity_type_schemas,
|
|
236
|
+
"BuiltinEventTypes": builtin_event_types,
|
|
237
|
+
"BuiltinEntityTypes": builtin_entity_types,
|
|
238
|
+
}
|
|
239
|
+
res = self.json("CreateCollection", {}, json.dumps(params))
|
|
240
|
+
return json.loads(res)
|
|
241
|
+
|
|
242
|
+
def get_collection(self, collection_name):
|
|
243
|
+
params = {"CollectionName": collection_name}
|
|
244
|
+
res = self.json("GetCollection", {}, json.dumps(params))
|
|
245
|
+
return json.loads(res)
|
|
246
|
+
|
|
247
|
+
def drop_collection(self, collection_name):
|
|
248
|
+
params = {"CollectionName": collection_name}
|
|
249
|
+
res = self.json("DropCollection", {}, json.dumps(params))
|
|
250
|
+
return json.loads(res)
|
|
251
|
+
|
|
252
|
+
def update_collection(
|
|
253
|
+
self,
|
|
254
|
+
collection_name,
|
|
255
|
+
custom_event_type_schemas=[],
|
|
256
|
+
custom_entity_type_schemas=[],
|
|
257
|
+
builtin_event_types=[],
|
|
258
|
+
builtin_entity_types=[],
|
|
259
|
+
):
|
|
260
|
+
params = {
|
|
261
|
+
"CollectionName": collection_name,
|
|
262
|
+
"CustomEventTypeSchemas": custom_event_type_schemas,
|
|
263
|
+
"CustomEntityTypeSchemas": custom_entity_type_schemas,
|
|
264
|
+
"BuiltinEventTypes": builtin_event_types,
|
|
265
|
+
"BuiltinEntityTypes": builtin_entity_types,
|
|
266
|
+
}
|
|
267
|
+
res = self.json("UpdateCollection", {}, json.dumps(params))
|
|
268
|
+
return json.loads(res)
|
|
269
|
+
|
|
270
|
+
def search_memory(self, collection_name, query, filter, limit=10):
|
|
271
|
+
params = {
|
|
272
|
+
"collection_name": collection_name,
|
|
273
|
+
"limit": limit,
|
|
274
|
+
"filter": filter,
|
|
275
|
+
}
|
|
276
|
+
if query:
|
|
277
|
+
params["query"] = query
|
|
278
|
+
res = self.json("SearchMemory", {}, json.dumps(params))
|
|
279
|
+
return json.loads(res)
|
|
280
|
+
|
|
281
|
+
def add_messages(
|
|
282
|
+
self, collection_name, session_id, messages, metadata, entities=None
|
|
283
|
+
):
|
|
284
|
+
params = {
|
|
285
|
+
"collection_name": collection_name,
|
|
286
|
+
"session_id": session_id,
|
|
287
|
+
"messages": messages,
|
|
288
|
+
"metadata": metadata,
|
|
289
|
+
}
|
|
290
|
+
if entities is not None:
|
|
291
|
+
params["entities"] = entities
|
|
292
|
+
res = self.json("AddMessages", {}, json.dumps(params))
|
|
293
|
+
return json.loads(res)
|
|
@@ -29,19 +29,19 @@ class BaseKnowledgebaseBackend(ABC, BaseModel):
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
@abstractmethod
|
|
32
|
-
def add_from_directory(self, directory: str, **kwargs) -> bool:
|
|
32
|
+
def add_from_directory(self, directory: str, *args, **kwargs) -> bool:
|
|
33
33
|
"""Add knowledge from file path to knowledgebase"""
|
|
34
34
|
|
|
35
35
|
@abstractmethod
|
|
36
|
-
def add_from_files(self, files: list[str], **kwargs) -> bool:
|
|
36
|
+
def add_from_files(self, files: list[str], *args, **kwargs) -> bool:
|
|
37
37
|
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
|
|
38
38
|
|
|
39
39
|
@abstractmethod
|
|
40
|
-
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
|
|
40
|
+
def add_from_text(self, text: str | list[str], *args, **kwargs) -> bool:
|
|
41
41
|
"""Add knowledge from text to knowledgebase"""
|
|
42
42
|
|
|
43
43
|
@abstractmethod
|
|
44
|
-
def search(self, **kwargs) -> list:
|
|
44
|
+
def search(self, *args, **kwargs) -> list:
|
|
45
45
|
"""Search knowledge from knowledgebase"""
|
|
46
46
|
|
|
47
47
|
# Optional methods for future use:
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import json
|
|
16
17
|
import re
|
|
17
18
|
from pathlib import Path
|
|
18
19
|
from typing import Any, Literal
|
|
@@ -24,9 +25,9 @@ from typing_extensions import override
|
|
|
24
25
|
import veadk.config # noqa E401
|
|
25
26
|
from veadk.config import getenv
|
|
26
27
|
from veadk.configs.database_configs import NormalTOSConfig, TOSConfig
|
|
27
|
-
from veadk.consts import DEFAULT_TOS_BUCKET_NAME
|
|
28
28
|
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
29
29
|
from veadk.knowledgebase.backends.utils import build_vikingdb_knowledgebase_request
|
|
30
|
+
from veadk.knowledgebase.entry import KnowledgebaseEntry
|
|
30
31
|
from veadk.utils.logger import get_logger
|
|
31
32
|
from veadk.utils.misc import formatted_timestamp
|
|
32
33
|
|
|
@@ -48,13 +49,6 @@ def _read_file_to_bytes(file_path: str) -> tuple[bytes, str]:
|
|
|
48
49
|
return file_content, file_name
|
|
49
50
|
|
|
50
51
|
|
|
51
|
-
def _extract_tos_attributes(**kwargs) -> tuple[str, str]:
|
|
52
|
-
"""Extract TOS attributes from kwargs"""
|
|
53
|
-
tos_bucket_name = kwargs.get("tos_bucket_name", DEFAULT_TOS_BUCKET_NAME)
|
|
54
|
-
tos_bucket_path = kwargs.get("tos_bucket_path", "knowledgebase")
|
|
55
|
-
return tos_bucket_name, tos_bucket_path
|
|
56
|
-
|
|
57
|
-
|
|
58
52
|
def get_files_in_directory(directory: str):
|
|
59
53
|
dir_path = Path(directory)
|
|
60
54
|
if not dir_path.is_dir():
|
|
@@ -109,15 +103,27 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
109
103
|
)
|
|
110
104
|
|
|
111
105
|
@override
|
|
112
|
-
def add_from_directory(
|
|
113
|
-
|
|
106
|
+
def add_from_directory(
|
|
107
|
+
self,
|
|
108
|
+
directory: str,
|
|
109
|
+
tos_bucket_name: str | None = None,
|
|
110
|
+
tos_bucket_path: str = "knowledgebase",
|
|
111
|
+
metadata: dict | None = None,
|
|
112
|
+
**kwargs,
|
|
113
|
+
) -> bool:
|
|
114
|
+
"""Add knowledge from a directory to the knowledgebase.
|
|
115
|
+
|
|
114
116
|
Args:
|
|
115
|
-
directory
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
directory (str): The directory to add to knowledgebase.
|
|
118
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
119
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
120
|
+
metadata (dict | None, optional): The metadata of the files. Defaults to None.
|
|
121
|
+
**kwargs: Additional keyword arguments.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
bool: True if successful, False otherwise.
|
|
119
125
|
"""
|
|
120
|
-
tos_bucket_name
|
|
126
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
121
127
|
files = get_files_in_directory(directory=directory)
|
|
122
128
|
for _file in files:
|
|
123
129
|
content, file_name = _read_file_to_bytes(_file)
|
|
@@ -125,40 +131,64 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
125
131
|
content,
|
|
126
132
|
tos_bucket_name=tos_bucket_name,
|
|
127
133
|
object_key=f"{tos_bucket_path}/{file_name}",
|
|
134
|
+
metadata=metadata,
|
|
128
135
|
)
|
|
129
136
|
self._add_doc(tos_url=tos_url)
|
|
130
137
|
return True
|
|
131
138
|
|
|
132
139
|
@override
|
|
133
|
-
def add_from_files(
|
|
134
|
-
|
|
140
|
+
def add_from_files(
|
|
141
|
+
self,
|
|
142
|
+
files: list[str],
|
|
143
|
+
tos_bucket_name: str | None = None,
|
|
144
|
+
tos_bucket_path: str = "knowledgebase",
|
|
145
|
+
metadata: dict | None = None,
|
|
146
|
+
**kwargs,
|
|
147
|
+
) -> bool:
|
|
148
|
+
"""Add knowledge from a directory to the knowledgebase.
|
|
149
|
+
|
|
135
150
|
Args:
|
|
136
|
-
files
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
151
|
+
files (list[str]): The files to add to knowledgebase.
|
|
152
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
153
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
154
|
+
metadata (dict | None, optional): The metadata of the files. Defaults to None.
|
|
155
|
+
**kwargs: Additional keyword arguments.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
bool: True if successful, False otherwise.
|
|
140
159
|
"""
|
|
141
|
-
tos_bucket_name
|
|
160
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
142
161
|
for _file in files:
|
|
143
162
|
content, file_name = _read_file_to_bytes(_file)
|
|
144
163
|
tos_url = self._upload_bytes_to_tos(
|
|
145
164
|
content,
|
|
146
165
|
tos_bucket_name=tos_bucket_name,
|
|
147
166
|
object_key=f"{tos_bucket_path}/{file_name}",
|
|
167
|
+
metadata=metadata,
|
|
148
168
|
)
|
|
149
169
|
self._add_doc(tos_url=tos_url)
|
|
150
170
|
return True
|
|
151
171
|
|
|
152
172
|
@override
|
|
153
|
-
def add_from_text(
|
|
154
|
-
|
|
173
|
+
def add_from_text(
|
|
174
|
+
self,
|
|
175
|
+
text: str | list[str],
|
|
176
|
+
tos_bucket_name: str | None = None,
|
|
177
|
+
tos_bucket_path: str = "knowledgebase",
|
|
178
|
+
metadata: dict | None = None,
|
|
179
|
+
**kwargs,
|
|
180
|
+
) -> bool:
|
|
181
|
+
"""Add knowledge from text to the knowledgebase.
|
|
182
|
+
|
|
155
183
|
Args:
|
|
156
|
-
text
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
184
|
+
text (str | list[str]): The text to add to knowledgebase.
|
|
185
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
186
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
bool: True if successful, False otherwise.
|
|
160
190
|
"""
|
|
161
|
-
tos_bucket_name
|
|
191
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
162
192
|
if isinstance(text, list):
|
|
163
193
|
object_keys = kwargs.get(
|
|
164
194
|
"tos_object_keys",
|
|
@@ -170,7 +200,7 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
170
200
|
for _text, _object_key in zip(text, object_keys):
|
|
171
201
|
_content = _text.encode("utf-8")
|
|
172
202
|
tos_url = self._upload_bytes_to_tos(
|
|
173
|
-
_content, tos_bucket_name, _object_key
|
|
203
|
+
_content, tos_bucket_name, _object_key, metadata=metadata
|
|
174
204
|
)
|
|
175
205
|
self._add_doc(tos_url=tos_url)
|
|
176
206
|
return True
|
|
@@ -179,26 +209,42 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
179
209
|
object_key = kwargs.get(
|
|
180
210
|
"object_key", f"veadk/knowledgebase/{formatted_timestamp()}.txt"
|
|
181
211
|
)
|
|
182
|
-
tos_url = self._upload_bytes_to_tos(
|
|
212
|
+
tos_url = self._upload_bytes_to_tos(
|
|
213
|
+
content, tos_bucket_name, object_key, metadata=metadata
|
|
214
|
+
)
|
|
183
215
|
self._add_doc(tos_url=tos_url)
|
|
184
216
|
else:
|
|
185
217
|
raise ValueError("text must be str or list[str]")
|
|
186
218
|
return True
|
|
187
219
|
|
|
188
|
-
def add_from_bytes(
|
|
189
|
-
|
|
220
|
+
def add_from_bytes(
|
|
221
|
+
self,
|
|
222
|
+
content: bytes,
|
|
223
|
+
file_name: str,
|
|
224
|
+
tos_bucket_name: str | None = None,
|
|
225
|
+
tos_bucket_path: str = "knowledgebase",
|
|
226
|
+
metadata: dict | None = None,
|
|
227
|
+
**kwargs,
|
|
228
|
+
) -> bool:
|
|
229
|
+
"""Add knowledge from bytes to the knowledgebase.
|
|
230
|
+
|
|
190
231
|
Args:
|
|
191
|
-
content
|
|
192
|
-
file_name
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
232
|
+
content (bytes): The content to add to knowledgebase.
|
|
233
|
+
file_name (str): The file name of the content.
|
|
234
|
+
tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
|
|
235
|
+
tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
|
|
236
|
+
metadata (dict | None, optional): The metadata of the files. Defaults to None.
|
|
237
|
+
**kwargs: Additional keyword arguments.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
bool: True if successful, False otherwise.
|
|
196
241
|
"""
|
|
197
|
-
tos_bucket_name
|
|
242
|
+
tos_bucket_name = tos_bucket_name or self.tos_config.bucket
|
|
198
243
|
tos_url = self._upload_bytes_to_tos(
|
|
199
244
|
content,
|
|
200
245
|
tos_bucket_name=tos_bucket_name,
|
|
201
246
|
object_key=f"{tos_bucket_path}/{file_name}",
|
|
247
|
+
metadata=metadata,
|
|
202
248
|
)
|
|
203
249
|
response = self._add_doc(tos_url=tos_url)
|
|
204
250
|
if response["code"] == 0:
|
|
@@ -206,8 +252,16 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
206
252
|
return False
|
|
207
253
|
|
|
208
254
|
@override
|
|
209
|
-
def search(
|
|
210
|
-
|
|
255
|
+
def search(
|
|
256
|
+
self,
|
|
257
|
+
query: str,
|
|
258
|
+
top_k: int = 5,
|
|
259
|
+
metadata: dict | None = None,
|
|
260
|
+
rerank: bool = True,
|
|
261
|
+
) -> list:
|
|
262
|
+
return self._search_knowledge(
|
|
263
|
+
query=query, top_k=top_k, metadata=metadata, rerank=rerank
|
|
264
|
+
)
|
|
211
265
|
|
|
212
266
|
def delete_collection(self) -> bool:
|
|
213
267
|
DELETE_COLLECTION_PATH = "/api/knowledge/collection/delete"
|
|
@@ -330,7 +384,7 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
330
384
|
response = self._do_request(
|
|
331
385
|
body={
|
|
332
386
|
"name": self.index,
|
|
333
|
-
"project":
|
|
387
|
+
"project": self.volcengine_project,
|
|
334
388
|
"description": "Created by Volcengine Agent Development Kit (VeADK).",
|
|
335
389
|
},
|
|
336
390
|
path=CREATE_COLLECTION_PATH,
|
|
@@ -343,10 +397,27 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
343
397
|
)
|
|
344
398
|
|
|
345
399
|
def _upload_bytes_to_tos(
|
|
346
|
-
self,
|
|
400
|
+
self,
|
|
401
|
+
content: bytes,
|
|
402
|
+
tos_bucket_name: str,
|
|
403
|
+
object_key: str,
|
|
404
|
+
metadata: dict | None = None,
|
|
347
405
|
) -> str:
|
|
406
|
+
# Here, we set the metadata via the TOS object, ref: https://www.volcengine.com/docs/84313/1254624
|
|
348
407
|
self._tos_client.bucket_name = tos_bucket_name
|
|
349
|
-
|
|
408
|
+
coro = self._tos_client.upload(
|
|
409
|
+
object_key=object_key,
|
|
410
|
+
bucket_name=tos_bucket_name,
|
|
411
|
+
data=content,
|
|
412
|
+
metadata=metadata,
|
|
413
|
+
)
|
|
414
|
+
try:
|
|
415
|
+
loop = asyncio.get_running_loop()
|
|
416
|
+
loop.run_until_complete(
|
|
417
|
+
coro
|
|
418
|
+
) if not loop.is_running() else asyncio.ensure_future(coro)
|
|
419
|
+
except RuntimeError:
|
|
420
|
+
asyncio.run(coro)
|
|
350
421
|
return f"{self._tos_client.bucket_name}/{object_key}"
|
|
351
422
|
|
|
352
423
|
def _add_doc(self, tos_url: str) -> Any:
|
|
@@ -355,7 +426,7 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
355
426
|
response = self._do_request(
|
|
356
427
|
body={
|
|
357
428
|
"collection_name": self.index,
|
|
358
|
-
"project":
|
|
429
|
+
"project": self.volcengine_project,
|
|
359
430
|
"add_type": "tos",
|
|
360
431
|
"tos_path": tos_url,
|
|
361
432
|
},
|
|
@@ -364,14 +435,43 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
364
435
|
)
|
|
365
436
|
return response
|
|
366
437
|
|
|
367
|
-
def _search_knowledge(
|
|
438
|
+
def _search_knowledge(
|
|
439
|
+
self,
|
|
440
|
+
query: str,
|
|
441
|
+
top_k: int = 5,
|
|
442
|
+
metadata: dict | None = None,
|
|
443
|
+
rerank: bool = True,
|
|
444
|
+
chunk_diffusion_count: int | None = 3,
|
|
445
|
+
) -> list[KnowledgebaseEntry]:
|
|
368
446
|
SEARCH_KNOWLEDGE_PATH = "/api/knowledge/collection/search_knowledge"
|
|
369
447
|
|
|
448
|
+
query_param = (
|
|
449
|
+
{
|
|
450
|
+
"doc_filter": {
|
|
451
|
+
"op": "and",
|
|
452
|
+
"conds": [
|
|
453
|
+
{"op": "must", "field": str(k), "conds": [str(v)]}
|
|
454
|
+
for k, v in metadata.items()
|
|
455
|
+
],
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
if metadata
|
|
459
|
+
else None
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
post_precessing = {
|
|
463
|
+
"rerank_swich": rerank,
|
|
464
|
+
"chunk_diffusion_count": chunk_diffusion_count,
|
|
465
|
+
}
|
|
466
|
+
|
|
370
467
|
response = self._do_request(
|
|
371
468
|
body={
|
|
372
469
|
"name": self.index,
|
|
470
|
+
"project": self.volcengine_project,
|
|
373
471
|
"query": query,
|
|
374
472
|
"limit": top_k,
|
|
473
|
+
"query_param": query_param,
|
|
474
|
+
"post_processing": post_precessing,
|
|
375
475
|
},
|
|
376
476
|
path=SEARCH_KNOWLEDGE_PATH,
|
|
377
477
|
method="POST",
|
|
@@ -382,11 +482,19 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
382
482
|
f"Error during knowledge search: {response.get('code')}, message: {response.get('message')}"
|
|
383
483
|
)
|
|
384
484
|
|
|
385
|
-
|
|
485
|
+
entries = []
|
|
486
|
+
for result in response.get("data", {}).get("result_list", []):
|
|
487
|
+
doc_meta_raw_str = result.get("doc_info", {}).get("doc_meta")
|
|
488
|
+
doc_meta_list = json.loads(doc_meta_raw_str) if doc_meta_raw_str else []
|
|
489
|
+
metadata = {}
|
|
490
|
+
for meta in doc_meta_list:
|
|
491
|
+
metadata[meta["field_name"]] = meta["field_value"]
|
|
386
492
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
493
|
+
entries.append(
|
|
494
|
+
KnowledgebaseEntry(content=result.get("content", ""), metadata=metadata)
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
return entries
|
|
390
498
|
|
|
391
499
|
def _do_request(
|
|
392
500
|
self,
|
|
@@ -409,4 +517,8 @@ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
|
409
517
|
headers=request.headers,
|
|
410
518
|
data=request.body,
|
|
411
519
|
)
|
|
520
|
+
if not response.ok:
|
|
521
|
+
logger.error(
|
|
522
|
+
f"VikingDBKnowledgeBackend error during request: {response.json()}"
|
|
523
|
+
)
|
|
412
524
|
return response.json()
|