veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/agent.py +11 -18
- veadk/agent_builder.py +94 -0
- veadk/{database/__init__.py → auth/base_auth.py} +7 -2
- veadk/auth/veauth/apmplus_veauth.py +65 -0
- veadk/auth/veauth/ark_veauth.py +77 -0
- veadk/auth/veauth/base_veauth.py +50 -0
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
- veadk/auth/veauth/vesearch_veauth.py +62 -0
- veadk/cli/cli.py +4 -0
- veadk/cli/cli_deploy.py +3 -2
- veadk/cli/cli_eval.py +160 -0
- veadk/cli/cli_init.py +1 -1
- veadk/cli/cli_pipeline.py +220 -0
- veadk/cli/cli_prompt.py +4 -4
- veadk/cli/cli_web.py +3 -1
- veadk/config.py +45 -81
- veadk/configs/database_configs.py +117 -0
- veadk/configs/model_configs.py +74 -0
- veadk/configs/tool_configs.py +42 -0
- veadk/configs/tracing_configs.py +110 -0
- veadk/consts.py +13 -1
- veadk/evaluation/base_evaluator.py +60 -44
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
- veadk/evaluation/eval_set_recorder.py +2 -2
- veadk/integrations/ve_code_pipeline/__init__.py +13 -0
- veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
- veadk/integrations/ve_cozeloop/__init__.py +13 -0
- veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
- veadk/integrations/ve_cr/ve_cr.py +20 -5
- veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
- veadk/integrations/ve_faas/ve_faas.py +351 -36
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
- veadk/integrations/ve_tls/__init__.py +13 -0
- veadk/integrations/ve_tls/utils.py +117 -0
- veadk/integrations/ve_tls/ve_tls.py +208 -0
- veadk/integrations/ve_tos/ve_tos.py +71 -75
- veadk/knowledgebase/backends/__init__.py +13 -0
- veadk/knowledgebase/backends/base_backend.py +59 -0
- veadk/knowledgebase/backends/in_memory_backend.py +82 -0
- veadk/knowledgebase/backends/opensearch_backend.py +136 -0
- veadk/knowledgebase/backends/redis_backend.py +144 -0
- veadk/knowledgebase/backends/utils.py +91 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
- veadk/knowledgebase/knowledgebase.py +109 -55
- veadk/memory/__init__.py +22 -0
- veadk/memory/long_term_memory.py +120 -51
- veadk/memory/long_term_memory_backends/__init__.py +13 -0
- veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
- veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
- veadk/memory/short_term_memory.py +80 -72
- veadk/memory/short_term_memory_backends/__init__.py +13 -0
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
- veadk/memory/short_term_memory_processor.py +9 -4
- veadk/runner.py +204 -247
- veadk/tools/builtin_tools/vesearch.py +2 -2
- veadk/tools/builtin_tools/video_generate.py +27 -20
- veadk/tools/builtin_tools/web_scraper.py +1 -1
- veadk/tools/builtin_tools/web_search.py +7 -7
- veadk/tools/load_knowledgebase_tool.py +1 -1
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
- veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
- veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
- veadk/tracing/telemetry/telemetry.py +66 -60
- veadk/utils/logger.py +1 -1
- veadk/utils/misc.py +63 -0
- veadk/utils/volcengine_sign.py +6 -2
- veadk/version.py +1 -1
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
- veadk/database/database_adapter.py +0 -368
- veadk/database/database_factory.py +0 -80
- veadk/database/kv/redis_database.py +0 -159
- veadk/database/local_database.py +0 -61
- veadk/database/relational/mysql_database.py +0 -173
- veadk/database/vector/opensearch_vector_database.py +0 -263
- veadk/database/vector/type.py +0 -50
- veadk/database/viking/viking_database.py +0 -471
- veadk/database/viking/viking_memory_db.py +0 -525
- /veadk/{database/kv → auth}/__init__.py +0 -0
- /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
- /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
- /veadk/{database/viking → configs}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import re
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any, Literal
|
|
19
|
+
|
|
20
|
+
import requests
|
|
21
|
+
from pydantic import Field
|
|
22
|
+
from typing_extensions import override
|
|
23
|
+
|
|
24
|
+
import veadk.config # noqa E401
|
|
25
|
+
from veadk.config import getenv
|
|
26
|
+
from veadk.configs.database_configs import NormalTOSConfig, TOSConfig
|
|
27
|
+
from veadk.consts import DEFAULT_TOS_BUCKET_NAME
|
|
28
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
29
|
+
from veadk.knowledgebase.backends.utils import build_vikingdb_knowledgebase_request
|
|
30
|
+
from veadk.utils.logger import get_logger
|
|
31
|
+
from veadk.utils.misc import formatted_timestamp
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from veadk.integrations.ve_tos.ve_tos import VeTOS
|
|
35
|
+
except ImportError:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
logger = get_logger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _read_file_to_bytes(file_path: str) -> tuple[bytes, str]:
|
|
44
|
+
"""Read file content to bytes, and file name"""
|
|
45
|
+
with open(file_path, "rb") as f:
|
|
46
|
+
file_content = f.read()
|
|
47
|
+
file_name = file_path.split("/")[-1]
|
|
48
|
+
return file_content, file_name
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _extract_tos_attributes(**kwargs) -> tuple[str, str]:
|
|
52
|
+
"""Extract TOS attributes from kwargs"""
|
|
53
|
+
tos_bucket_name = kwargs.get("tos_bucket_name", DEFAULT_TOS_BUCKET_NAME)
|
|
54
|
+
tos_bucket_path = kwargs.get("tos_bucket_path", "knowledgebase")
|
|
55
|
+
return tos_bucket_name, tos_bucket_path
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_files_in_directory(directory: str):
|
|
59
|
+
dir_path = Path(directory)
|
|
60
|
+
if not dir_path.is_dir():
|
|
61
|
+
raise ValueError(f"The directory does not exist: {directory}")
|
|
62
|
+
file_paths = [str(file) for file in dir_path.iterdir() if file.is_file()]
|
|
63
|
+
return file_paths
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
67
|
+
volcengine_access_key: str = Field(
|
|
68
|
+
default_factory=lambda: getenv("VOLCENGINE_ACCESS_KEY")
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
volcengine_secret_key: str = Field(
|
|
72
|
+
default_factory=lambda: getenv("VOLCENGINE_SECRET_KEY")
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
volcengine_project: str = "default"
|
|
76
|
+
"""VikingDB knowledgebase project in Volcengine console platform. Default by `default`"""
|
|
77
|
+
|
|
78
|
+
region: str = "cn-beijing"
|
|
79
|
+
"""VikingDB knowledgebase region"""
|
|
80
|
+
|
|
81
|
+
tos_config: TOSConfig | NormalTOSConfig = Field(default_factory=TOSConfig)
|
|
82
|
+
"""TOS config, used to upload files to TOS"""
|
|
83
|
+
|
|
84
|
+
def precheck_index_naming(self):
|
|
85
|
+
if not (
|
|
86
|
+
isinstance(self.index, str)
|
|
87
|
+
and 0 < len(self.index) <= 128
|
|
88
|
+
and re.fullmatch(r"^[a-zA-Z][a-zA-Z0-9_]*$", self.index)
|
|
89
|
+
):
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"The index name does not conform to the rules: "
|
|
92
|
+
"it must start with an English letter, contain only letters, numbers, and underscores, and have a length of 1-128."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def model_post_init(self, __context: Any) -> None:
|
|
96
|
+
self.precheck_index_naming()
|
|
97
|
+
|
|
98
|
+
# check whether collection exist, if not, create it
|
|
99
|
+
if not self.collection_status()["existed"]:
|
|
100
|
+
logger.warning(
|
|
101
|
+
f"VikingDB knowledgebase collection {self.index} does not exist, please create it first..."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self._tos_client = VeTOS(
|
|
105
|
+
ak=self.volcengine_access_key,
|
|
106
|
+
sk=self.volcengine_secret_key,
|
|
107
|
+
region=self.tos_config.region,
|
|
108
|
+
bucket_name=self.tos_config.bucket,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
@override
|
|
112
|
+
def add_from_directory(self, directory: str, **kwargs) -> bool:
|
|
113
|
+
"""
|
|
114
|
+
Args:
|
|
115
|
+
directory: str, the directory to add to knowledgebase
|
|
116
|
+
**kwargs:
|
|
117
|
+
- tos_bucket_name: str, the bucket name of TOS
|
|
118
|
+
- tos_bucket_path: str, the path of TOS bucket
|
|
119
|
+
"""
|
|
120
|
+
tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
|
|
121
|
+
files = get_files_in_directory(directory=directory)
|
|
122
|
+
for _file in files:
|
|
123
|
+
content, file_name = _read_file_to_bytes(_file)
|
|
124
|
+
tos_url = self._upload_bytes_to_tos(
|
|
125
|
+
content,
|
|
126
|
+
tos_bucket_name=tos_bucket_name,
|
|
127
|
+
object_key=f"{tos_bucket_path}/{file_name}",
|
|
128
|
+
)
|
|
129
|
+
self._add_doc(tos_url=tos_url)
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
@override
|
|
133
|
+
def add_from_files(self, files: list[str], **kwargs) -> bool:
|
|
134
|
+
"""
|
|
135
|
+
Args:
|
|
136
|
+
files: list[str], the files to add to knowledgebase
|
|
137
|
+
**kwargs:
|
|
138
|
+
- tos_bucket_name: str, the bucket name of TOS
|
|
139
|
+
- tos_bucket_path: str, the path of TOS bucket
|
|
140
|
+
"""
|
|
141
|
+
tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
|
|
142
|
+
for _file in files:
|
|
143
|
+
content, file_name = _read_file_to_bytes(_file)
|
|
144
|
+
tos_url = self._upload_bytes_to_tos(
|
|
145
|
+
content,
|
|
146
|
+
tos_bucket_name=tos_bucket_name,
|
|
147
|
+
object_key=f"{tos_bucket_path}/{file_name}",
|
|
148
|
+
)
|
|
149
|
+
self._add_doc(tos_url=tos_url)
|
|
150
|
+
return True
|
|
151
|
+
|
|
152
|
+
@override
|
|
153
|
+
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
|
|
154
|
+
"""
|
|
155
|
+
Args:
|
|
156
|
+
text: str or list[str], the text to add to knowledgebase
|
|
157
|
+
**kwargs:
|
|
158
|
+
- tos_bucket_name: str, the bucket name of TOS
|
|
159
|
+
- tos_bucket_path: str, the path of TOS bucket
|
|
160
|
+
"""
|
|
161
|
+
tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
|
|
162
|
+
if isinstance(text, list):
|
|
163
|
+
object_keys = kwargs.get(
|
|
164
|
+
"tos_object_keys",
|
|
165
|
+
[
|
|
166
|
+
f"{tos_bucket_path}/{formatted_timestamp()}-{i}.txt"
|
|
167
|
+
for i, _ in enumerate(text)
|
|
168
|
+
],
|
|
169
|
+
)
|
|
170
|
+
for _text, _object_key in zip(text, object_keys):
|
|
171
|
+
_content = _text.encode("utf-8")
|
|
172
|
+
tos_url = self._upload_bytes_to_tos(
|
|
173
|
+
_content, tos_bucket_name, _object_key
|
|
174
|
+
)
|
|
175
|
+
self._add_doc(tos_url=tos_url)
|
|
176
|
+
return True
|
|
177
|
+
elif isinstance(text, str):
|
|
178
|
+
content = text.encode("utf-8")
|
|
179
|
+
object_key = kwargs.get(
|
|
180
|
+
"object_key", f"veadk/knowledgebase/{formatted_timestamp()}.txt"
|
|
181
|
+
)
|
|
182
|
+
tos_url = self._upload_bytes_to_tos(content, tos_bucket_name, object_key)
|
|
183
|
+
self._add_doc(tos_url=tos_url)
|
|
184
|
+
else:
|
|
185
|
+
raise ValueError("text must be str or list[str]")
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
def add_from_bytes(self, content: bytes, file_name: str, **kwargs) -> bool:
|
|
189
|
+
"""
|
|
190
|
+
Args:
|
|
191
|
+
content: bytes, the content to add to knowledgebase, bytes
|
|
192
|
+
file_name: str, the file name of the content
|
|
193
|
+
**kwargs:
|
|
194
|
+
- tos_bucket_name: str, the bucket name of TOS
|
|
195
|
+
- tos_bucket_path: str, the path of TOS bucket
|
|
196
|
+
"""
|
|
197
|
+
tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
|
|
198
|
+
tos_url = self._upload_bytes_to_tos(
|
|
199
|
+
content,
|
|
200
|
+
tos_bucket_name=tos_bucket_name,
|
|
201
|
+
object_key=f"{tos_bucket_path}/{file_name}",
|
|
202
|
+
)
|
|
203
|
+
response = self._add_doc(tos_url=tos_url)
|
|
204
|
+
if response["code"] == 0:
|
|
205
|
+
return True
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
@override
|
|
209
|
+
def search(self, query: str, top_k: int = 5) -> list:
|
|
210
|
+
return self._search_knowledge(query=query, top_k=top_k)
|
|
211
|
+
|
|
212
|
+
def delete_collection(self) -> bool:
|
|
213
|
+
DELETE_COLLECTION_PATH = "/api/knowledge/collection/delete"
|
|
214
|
+
|
|
215
|
+
response = self._do_request(
|
|
216
|
+
body={
|
|
217
|
+
"name": self.index,
|
|
218
|
+
"project": self.volcengine_project,
|
|
219
|
+
},
|
|
220
|
+
path=DELETE_COLLECTION_PATH,
|
|
221
|
+
method="POST",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if response.get("code") != 0:
|
|
225
|
+
logger.error(f"Error during collection deletion: {response}")
|
|
226
|
+
return False
|
|
227
|
+
return True
|
|
228
|
+
|
|
229
|
+
def delete_doc_by_id(self, id: str) -> bool:
|
|
230
|
+
DELETE_DOC_PATH = "/api/knowledge/doc/delete"
|
|
231
|
+
response = self._do_request(
|
|
232
|
+
body={
|
|
233
|
+
"collection_name": self.index,
|
|
234
|
+
"project": self.volcengine_project,
|
|
235
|
+
"doc_id": id,
|
|
236
|
+
},
|
|
237
|
+
path=DELETE_DOC_PATH,
|
|
238
|
+
method="POST",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if response.get("code") != 0:
|
|
242
|
+
return False
|
|
243
|
+
return True
|
|
244
|
+
|
|
245
|
+
def list_docs(self, offset: int = 0, limit: int = -1):
|
|
246
|
+
"""List documents in collection.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
offset (int): The offset of the first document to return.
|
|
250
|
+
limit (int): The maximum number of documents to return. -1 means return all documents but max is 100.
|
|
251
|
+
"""
|
|
252
|
+
LIST_DOCS_PATH = "/api/knowledge/doc/list"
|
|
253
|
+
response = self._do_request(
|
|
254
|
+
body={
|
|
255
|
+
"collection_name": self.index,
|
|
256
|
+
"project": self.volcengine_project,
|
|
257
|
+
"offset": offset,
|
|
258
|
+
"limit": limit,
|
|
259
|
+
},
|
|
260
|
+
path=LIST_DOCS_PATH,
|
|
261
|
+
method="POST",
|
|
262
|
+
)
|
|
263
|
+
if response.get("code") != 0:
|
|
264
|
+
raise ValueError(f"Error during list documents: {response.get('code')}")
|
|
265
|
+
if not response["data"].get("doc_list", []):
|
|
266
|
+
return []
|
|
267
|
+
return response["data"]["doc_list"]
|
|
268
|
+
|
|
269
|
+
def list_chunks(self, offset: int = 0, limit: int = -1):
|
|
270
|
+
"""List chunks in collection.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
offset (int): The offset of the first chunk to return.
|
|
274
|
+
limit (int): The maximum number of chunks to return. -1 means return all chunks but max is 100.
|
|
275
|
+
"""
|
|
276
|
+
LIST_CHUNKS_PATH = "/api/knowledge/point/list"
|
|
277
|
+
response = self._do_request(
|
|
278
|
+
body={
|
|
279
|
+
"collection_name": self.index,
|
|
280
|
+
"project": self.volcengine_project,
|
|
281
|
+
"offset": offset,
|
|
282
|
+
"limit": limit,
|
|
283
|
+
},
|
|
284
|
+
path=LIST_CHUNKS_PATH,
|
|
285
|
+
method="POST",
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
if response.get("code") != 0:
|
|
289
|
+
raise ValueError(f"Error during list chunks: {response}")
|
|
290
|
+
|
|
291
|
+
if not response["data"].get("point_list", []):
|
|
292
|
+
return []
|
|
293
|
+
data = [
|
|
294
|
+
{
|
|
295
|
+
"id": res["point_id"],
|
|
296
|
+
"content": res["content"],
|
|
297
|
+
"metadata": res["doc_info"],
|
|
298
|
+
}
|
|
299
|
+
for res in response["data"]["point_list"]
|
|
300
|
+
]
|
|
301
|
+
return data
|
|
302
|
+
|
|
303
|
+
def collection_status(self):
|
|
304
|
+
COLLECTION_INFO_PATH = "/api/knowledge/collection/info"
|
|
305
|
+
response = self._do_request(
|
|
306
|
+
body={
|
|
307
|
+
"name": self.index,
|
|
308
|
+
"project": self.volcengine_project,
|
|
309
|
+
},
|
|
310
|
+
path=COLLECTION_INFO_PATH,
|
|
311
|
+
method="POST",
|
|
312
|
+
)
|
|
313
|
+
if response["code"] == 0:
|
|
314
|
+
status = response["data"]["pipeline_list"][0]["index_list"][0]["status"]
|
|
315
|
+
return {
|
|
316
|
+
"existed": True,
|
|
317
|
+
"status": status,
|
|
318
|
+
}
|
|
319
|
+
elif response["code"] == 1000005:
|
|
320
|
+
return {
|
|
321
|
+
"existed": False,
|
|
322
|
+
"status": None,
|
|
323
|
+
}
|
|
324
|
+
else:
|
|
325
|
+
raise ValueError(f"Error during collection status: {response}")
|
|
326
|
+
|
|
327
|
+
def create_collection(self) -> None:
|
|
328
|
+
CREATE_COLLECTION_PATH = "/api/knowledge/collection/create"
|
|
329
|
+
|
|
330
|
+
response = self._do_request(
|
|
331
|
+
body={
|
|
332
|
+
"name": self.index,
|
|
333
|
+
"project": "default",
|
|
334
|
+
"description": "Created by Volcengine Agent Development Kit (VeADK).",
|
|
335
|
+
},
|
|
336
|
+
path=CREATE_COLLECTION_PATH,
|
|
337
|
+
method="POST",
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
if response.get("code") != 0:
|
|
341
|
+
raise ValueError(
|
|
342
|
+
f"Error during collection creation: {response.get('code')}"
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
def _upload_bytes_to_tos(
|
|
346
|
+
self, content: bytes, tos_bucket_name: str, object_key: str
|
|
347
|
+
) -> str:
|
|
348
|
+
self._tos_client.bucket_name = tos_bucket_name
|
|
349
|
+
asyncio.run(self._tos_client.upload(object_key=object_key, data=content))
|
|
350
|
+
return f"{self._tos_client.bucket_name}/{object_key}"
|
|
351
|
+
|
|
352
|
+
def _add_doc(self, tos_url: str) -> Any:
|
|
353
|
+
ADD_DOC_PATH = "/api/knowledge/doc/add"
|
|
354
|
+
|
|
355
|
+
response = self._do_request(
|
|
356
|
+
body={
|
|
357
|
+
"collection_name": self.index,
|
|
358
|
+
"project": "default",
|
|
359
|
+
"add_type": "tos",
|
|
360
|
+
"tos_path": tos_url,
|
|
361
|
+
},
|
|
362
|
+
path=ADD_DOC_PATH,
|
|
363
|
+
method="POST",
|
|
364
|
+
)
|
|
365
|
+
return response
|
|
366
|
+
|
|
367
|
+
def _search_knowledge(self, query: str, top_k: int = 5) -> list[str]:
|
|
368
|
+
SEARCH_KNOWLEDGE_PATH = "/api/knowledge/collection/search_knowledge"
|
|
369
|
+
|
|
370
|
+
response = self._do_request(
|
|
371
|
+
body={
|
|
372
|
+
"name": self.index,
|
|
373
|
+
"query": query,
|
|
374
|
+
"limit": top_k,
|
|
375
|
+
},
|
|
376
|
+
path=SEARCH_KNOWLEDGE_PATH,
|
|
377
|
+
method="POST",
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
if response.get("code") != 0:
|
|
381
|
+
raise ValueError(
|
|
382
|
+
f"Error during knowledge search: {response.get('code')}, message: {response.get('message')}"
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
search_result_list = response.get("data", {}).get("result_list", [])
|
|
386
|
+
|
|
387
|
+
return [
|
|
388
|
+
search_result.get("content", "") for search_result in search_result_list
|
|
389
|
+
]
|
|
390
|
+
|
|
391
|
+
def _do_request(
|
|
392
|
+
self,
|
|
393
|
+
body: dict,
|
|
394
|
+
path: str,
|
|
395
|
+
method: Literal["GET", "POST", "PUT", "DELETE"] = "POST",
|
|
396
|
+
) -> dict:
|
|
397
|
+
VIKINGDB_KNOWLEDGEBASE_BASE_URL = "api-knowledgebase.mlp.cn-beijing.volces.com"
|
|
398
|
+
|
|
399
|
+
request = build_vikingdb_knowledgebase_request(
|
|
400
|
+
path=path,
|
|
401
|
+
volcengine_access_key=self.volcengine_access_key,
|
|
402
|
+
volcengine_secret_key=self.volcengine_secret_key,
|
|
403
|
+
method=method,
|
|
404
|
+
data=body,
|
|
405
|
+
)
|
|
406
|
+
response = requests.request(
|
|
407
|
+
method=method,
|
|
408
|
+
url=f"https://{VIKINGDB_KNOWLEDGEBASE_BASE_URL}{path}",
|
|
409
|
+
headers=request.headers,
|
|
410
|
+
data=request.body,
|
|
411
|
+
)
|
|
412
|
+
return response.json()
|
|
@@ -12,79 +12,133 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import
|
|
15
|
+
from typing import Any, Callable, Literal
|
|
16
16
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
from typing_extensions import Union
|
|
19
|
+
|
|
20
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
19
21
|
from veadk.utils.logger import get_logger
|
|
20
22
|
|
|
21
23
|
logger = get_logger(__name__)
|
|
22
24
|
|
|
23
25
|
|
|
26
|
+
def _get_backend_cls(backend: str) -> type[BaseKnowledgebaseBackend]:
|
|
27
|
+
match backend:
|
|
28
|
+
case "local":
|
|
29
|
+
from veadk.knowledgebase.backends.in_memory_backend import (
|
|
30
|
+
InMemoryKnowledgeBackend,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return InMemoryKnowledgeBackend
|
|
34
|
+
case "opensearch":
|
|
35
|
+
from veadk.knowledgebase.backends.opensearch_backend import (
|
|
36
|
+
OpensearchKnowledgeBackend,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return OpensearchKnowledgeBackend
|
|
40
|
+
case "viking":
|
|
41
|
+
from veadk.knowledgebase.backends.vikingdb_knowledge_backend import (
|
|
42
|
+
VikingDBKnowledgeBackend,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
return VikingDBKnowledgeBackend
|
|
46
|
+
case "redis":
|
|
47
|
+
from veadk.knowledgebase.backends.redis_backend import (
|
|
48
|
+
RedisKnowledgeBackend,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
return RedisKnowledgeBackend
|
|
52
|
+
|
|
53
|
+
raise ValueError(f"Unsupported knowledgebase backend: {backend}")
|
|
54
|
+
|
|
55
|
+
|
|
24
56
|
def build_knowledgebase_index(app_name: str):
|
|
25
57
|
return f"veadk_kb_{app_name}"
|
|
26
58
|
|
|
27
59
|
|
|
28
|
-
class KnowledgeBase:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
60
|
+
class KnowledgeBase(BaseModel):
|
|
61
|
+
backend: Union[
|
|
62
|
+
Literal["local", "opensearch", "viking", "redis"], BaseKnowledgebaseBackend
|
|
63
|
+
] = "local"
|
|
64
|
+
"""Knowledgebase backend type. Supported backends are:
|
|
65
|
+
- local: In-memory knowledgebase, data will be lost when the program exits.
|
|
66
|
+
- opensearch: OpenSearch knowledgebase, requires an OpenSearch cluster.
|
|
67
|
+
- viking: Volcengine VikingDB knowledgebase, requires VikingDB service.
|
|
68
|
+
- redis: Redis knowledgebase, requires Redis with vector search capability.
|
|
69
|
+
Default is `local`."""
|
|
36
70
|
|
|
37
|
-
|
|
38
|
-
|
|
71
|
+
backend_config: dict = Field(default_factory=dict)
|
|
72
|
+
"""Configuration for the backend"""
|
|
39
73
|
|
|
40
|
-
|
|
41
|
-
|
|
74
|
+
top_k: int = 10
|
|
75
|
+
"""Number of top similar documents to retrieve during search.
|
|
76
|
+
|
|
77
|
+
Default is 10."""
|
|
42
78
|
|
|
43
|
-
|
|
44
|
-
f"Initialized knowledgebase: db_client={self.db_client.__class__.__name__} adapter={self.adapter}"
|
|
45
|
-
)
|
|
79
|
+
app_name: str = ""
|
|
46
80
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
data: str | list[str] | TextIO | BinaryIO | bytes,
|
|
50
|
-
app_name: str,
|
|
51
|
-
**kwargs,
|
|
52
|
-
):
|
|
53
|
-
"""
|
|
54
|
-
Add documents to the vector database.
|
|
55
|
-
You can only upload files or file characters when the adapter type used is vikingdb.
|
|
56
|
-
In addition, if you upload data of the bytes type,
|
|
57
|
-
for example, if you read the file stream of a pdf, then you need to pass an additional parameter file_ext = '.pdf'.
|
|
58
|
-
"""
|
|
59
|
-
if self.backend != "viking" and not (
|
|
60
|
-
isinstance(data, str) or isinstance(data, list)
|
|
61
|
-
):
|
|
62
|
-
raise ValueError(
|
|
63
|
-
"Only vikingdb supports uploading files or file characters."
|
|
64
|
-
)
|
|
81
|
+
index: str = ""
|
|
82
|
+
"""The name of the knowledgebase index. If not provided, it will be generated based on the `app_name`."""
|
|
65
83
|
|
|
66
|
-
|
|
84
|
+
def model_post_init(self, __context: Any) -> None:
|
|
85
|
+
if isinstance(self.backend, BaseKnowledgebaseBackend):
|
|
86
|
+
self._backend = self.backend
|
|
87
|
+
logger.info(
|
|
88
|
+
f"Initialized knowledgebase with provided backend instance {self._backend.__class__.__name__}"
|
|
89
|
+
)
|
|
90
|
+
return
|
|
67
91
|
|
|
68
|
-
|
|
92
|
+
# must provide at least one of them
|
|
93
|
+
if not self.app_name and not self.index:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
"Either `app_name` or `index` must be provided one of them."
|
|
96
|
+
)
|
|
69
97
|
|
|
70
|
-
|
|
98
|
+
# priority use index
|
|
99
|
+
if self.app_name and self.index:
|
|
100
|
+
logger.warning(
|
|
101
|
+
"`app_name` and `index` are both provided, using `index` as the knowledgebase index name."
|
|
102
|
+
)
|
|
71
103
|
|
|
72
|
-
|
|
73
|
-
|
|
104
|
+
# generate index name if `index` not provided but `app_name` is provided
|
|
105
|
+
if self.app_name and not self.index:
|
|
106
|
+
self.index = build_knowledgebase_index(self.app_name)
|
|
107
|
+
logger.info(
|
|
108
|
+
f"Knowledgebase index is set to {self.index} (generated by the app_name: {self.app_name})."
|
|
109
|
+
)
|
|
74
110
|
|
|
75
111
|
logger.info(
|
|
76
|
-
f"
|
|
112
|
+
f"Initializing knowledgebase: backend={self.backend} top_k={self.top_k}"
|
|
113
|
+
)
|
|
114
|
+
self._backend = _get_backend_cls(self.backend)(
|
|
115
|
+
index=self.index, **self.backend_config if self.backend_config else {}
|
|
116
|
+
)
|
|
117
|
+
logger.info(
|
|
118
|
+
f"Initialized knowledgebase with backend {self._backend.__class__.__name__}"
|
|
77
119
|
)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
120
|
+
|
|
121
|
+
def add_from_directory(self, directory: str, **kwargs) -> bool:
|
|
122
|
+
"""Add knowledge from file path to knowledgebase"""
|
|
123
|
+
return self._backend.add_from_directory(directory=directory, **kwargs)
|
|
124
|
+
|
|
125
|
+
def add_from_files(self, files: list[str], **kwargs) -> bool:
|
|
126
|
+
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
|
|
127
|
+
return self._backend.add_from_files(files=files, **kwargs)
|
|
128
|
+
|
|
129
|
+
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
|
|
130
|
+
"""Add knowledge from text to knowledgebase"""
|
|
131
|
+
return self._backend.add_from_text(text=text, **kwargs)
|
|
132
|
+
|
|
133
|
+
def search(self, query: str, top_k: int = 0, **kwargs) -> list[str]:
|
|
134
|
+
"""Search knowledge from knowledgebase"""
|
|
135
|
+
if top_k == 0:
|
|
136
|
+
top_k = self.top_k
|
|
137
|
+
return self._backend.search(query=query, top_k=top_k, **kwargs)
|
|
138
|
+
|
|
139
|
+
def __getattr__(self, name) -> Callable:
|
|
140
|
+
"""In case of knowledgebase have no backends' methods (`delete`, `list_chunks`, etc)
|
|
141
|
+
|
|
142
|
+
For example, knowledgebase.delete(...) -> self._backend.delete(...)
|
|
143
|
+
"""
|
|
144
|
+
return getattr(self._backend, name)
|
veadk/memory/__init__.py
CHANGED
|
@@ -11,3 +11,25 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from veadk.memory.long_term_memory import LongTermMemory
|
|
19
|
+
from veadk.memory.short_term_memory import ShortTermMemory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Lazy loading for classes
|
|
23
|
+
def __getattr__(name):
|
|
24
|
+
if name == "ShortTermMemory":
|
|
25
|
+
from veadk.memory.short_term_memory import ShortTermMemory
|
|
26
|
+
|
|
27
|
+
return ShortTermMemory
|
|
28
|
+
if name == "LongTeremMemory":
|
|
29
|
+
from veadk.memory.long_term_memory import LongTermMemory
|
|
30
|
+
|
|
31
|
+
return LongTermMemory
|
|
32
|
+
raise AttributeError(f"module 'veadk.memory' has no attribute '{name}'")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
__all__ = ["ShortTermMemory", "LongTermMemory"]
|