veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (102) hide show
  1. veadk/agent.py +11 -18
  2. veadk/agent_builder.py +94 -0
  3. veadk/{database/__init__.py → auth/base_auth.py} +7 -2
  4. veadk/auth/veauth/apmplus_veauth.py +65 -0
  5. veadk/auth/veauth/ark_veauth.py +77 -0
  6. veadk/auth/veauth/base_veauth.py +50 -0
  7. veadk/auth/veauth/opensearch_veauth.py +75 -0
  8. veadk/auth/veauth/postgresql_veauth.py +75 -0
  9. veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
  10. veadk/auth/veauth/vesearch_veauth.py +62 -0
  11. veadk/cli/cli.py +4 -0
  12. veadk/cli/cli_deploy.py +3 -2
  13. veadk/cli/cli_eval.py +160 -0
  14. veadk/cli/cli_init.py +1 -1
  15. veadk/cli/cli_pipeline.py +220 -0
  16. veadk/cli/cli_prompt.py +4 -4
  17. veadk/cli/cli_web.py +3 -1
  18. veadk/config.py +45 -81
  19. veadk/configs/database_configs.py +117 -0
  20. veadk/configs/model_configs.py +74 -0
  21. veadk/configs/tool_configs.py +42 -0
  22. veadk/configs/tracing_configs.py +110 -0
  23. veadk/consts.py +13 -1
  24. veadk/evaluation/base_evaluator.py +60 -44
  25. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
  26. veadk/evaluation/eval_set_recorder.py +2 -2
  27. veadk/integrations/ve_code_pipeline/__init__.py +13 -0
  28. veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
  29. veadk/integrations/ve_cozeloop/__init__.py +13 -0
  30. veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
  31. veadk/integrations/ve_cr/ve_cr.py +20 -5
  32. veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
  33. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
  34. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
  35. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
  36. veadk/integrations/ve_faas/ve_faas.py +351 -36
  37. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
  38. veadk/integrations/ve_tls/__init__.py +13 -0
  39. veadk/integrations/ve_tls/utils.py +117 -0
  40. veadk/integrations/ve_tls/ve_tls.py +208 -0
  41. veadk/integrations/ve_tos/ve_tos.py +71 -75
  42. veadk/knowledgebase/backends/__init__.py +13 -0
  43. veadk/knowledgebase/backends/base_backend.py +59 -0
  44. veadk/knowledgebase/backends/in_memory_backend.py +82 -0
  45. veadk/knowledgebase/backends/opensearch_backend.py +136 -0
  46. veadk/knowledgebase/backends/redis_backend.py +144 -0
  47. veadk/knowledgebase/backends/utils.py +91 -0
  48. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
  49. veadk/knowledgebase/knowledgebase.py +109 -55
  50. veadk/memory/__init__.py +22 -0
  51. veadk/memory/long_term_memory.py +120 -51
  52. veadk/memory/long_term_memory_backends/__init__.py +13 -0
  53. veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
  54. veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
  55. veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
  56. veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
  57. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
  58. veadk/memory/short_term_memory.py +80 -72
  59. veadk/memory/short_term_memory_backends/__init__.py +13 -0
  60. veadk/memory/short_term_memory_backends/base_backend.py +31 -0
  61. veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
  62. veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
  63. veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
  64. veadk/memory/short_term_memory_processor.py +9 -4
  65. veadk/runner.py +204 -247
  66. veadk/tools/builtin_tools/vesearch.py +2 -2
  67. veadk/tools/builtin_tools/video_generate.py +27 -20
  68. veadk/tools/builtin_tools/web_scraper.py +1 -1
  69. veadk/tools/builtin_tools/web_search.py +7 -7
  70. veadk/tools/load_knowledgebase_tool.py +1 -1
  71. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
  72. veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
  73. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
  74. veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
  75. veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
  76. veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
  77. veadk/tracing/telemetry/telemetry.py +66 -60
  78. veadk/utils/logger.py +1 -1
  79. veadk/utils/misc.py +63 -0
  80. veadk/utils/volcengine_sign.py +6 -2
  81. veadk/version.py +1 -1
  82. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
  83. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
  84. veadk/database/database_adapter.py +0 -368
  85. veadk/database/database_factory.py +0 -80
  86. veadk/database/kv/redis_database.py +0 -159
  87. veadk/database/local_database.py +0 -61
  88. veadk/database/relational/mysql_database.py +0 -173
  89. veadk/database/vector/opensearch_vector_database.py +0 -263
  90. veadk/database/vector/type.py +0 -50
  91. veadk/database/viking/viking_database.py +0 -471
  92. veadk/database/viking/viking_memory_db.py +0 -525
  93. /veadk/{database/kv → auth}/__init__.py +0 -0
  94. /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
  95. /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
  96. /veadk/{database/viking → configs}/__init__.py +0 -0
  97. /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
  98. /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
  99. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
  100. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
  101. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
  102. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,412 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import asyncio
16
+ import re
17
+ from pathlib import Path
18
+ from typing import Any, Literal
19
+
20
+ import requests
21
+ from pydantic import Field
22
+ from typing_extensions import override
23
+
24
+ import veadk.config # noqa E401
25
+ from veadk.config import getenv
26
+ from veadk.configs.database_configs import NormalTOSConfig, TOSConfig
27
+ from veadk.consts import DEFAULT_TOS_BUCKET_NAME
28
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
29
+ from veadk.knowledgebase.backends.utils import build_vikingdb_knowledgebase_request
30
+ from veadk.utils.logger import get_logger
31
+ from veadk.utils.misc import formatted_timestamp
32
+
33
+ try:
34
+ from veadk.integrations.ve_tos.ve_tos import VeTOS
35
+ except ImportError:
36
+ raise ImportError(
37
+ "Please install VeADK extensions\npip install veadk-python[extensions]"
38
+ )
39
+
40
+ logger = get_logger(__name__)
41
+
42
+
43
+ def _read_file_to_bytes(file_path: str) -> tuple[bytes, str]:
44
+ """Read file content to bytes, and file name"""
45
+ with open(file_path, "rb") as f:
46
+ file_content = f.read()
47
+ file_name = file_path.split("/")[-1]
48
+ return file_content, file_name
49
+
50
+
51
+ def _extract_tos_attributes(**kwargs) -> tuple[str, str]:
52
+ """Extract TOS attributes from kwargs"""
53
+ tos_bucket_name = kwargs.get("tos_bucket_name", DEFAULT_TOS_BUCKET_NAME)
54
+ tos_bucket_path = kwargs.get("tos_bucket_path", "knowledgebase")
55
+ return tos_bucket_name, tos_bucket_path
56
+
57
+
58
+ def get_files_in_directory(directory: str):
59
+ dir_path = Path(directory)
60
+ if not dir_path.is_dir():
61
+ raise ValueError(f"The directory does not exist: {directory}")
62
+ file_paths = [str(file) for file in dir_path.iterdir() if file.is_file()]
63
+ return file_paths
64
+
65
+
66
+ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
67
+ volcengine_access_key: str = Field(
68
+ default_factory=lambda: getenv("VOLCENGINE_ACCESS_KEY")
69
+ )
70
+
71
+ volcengine_secret_key: str = Field(
72
+ default_factory=lambda: getenv("VOLCENGINE_SECRET_KEY")
73
+ )
74
+
75
+ volcengine_project: str = "default"
76
+ """VikingDB knowledgebase project in Volcengine console platform. Default by `default`"""
77
+
78
+ region: str = "cn-beijing"
79
+ """VikingDB knowledgebase region"""
80
+
81
+ tos_config: TOSConfig | NormalTOSConfig = Field(default_factory=TOSConfig)
82
+ """TOS config, used to upload files to TOS"""
83
+
84
+ def precheck_index_naming(self):
85
+ if not (
86
+ isinstance(self.index, str)
87
+ and 0 < len(self.index) <= 128
88
+ and re.fullmatch(r"^[a-zA-Z][a-zA-Z0-9_]*$", self.index)
89
+ ):
90
+ raise ValueError(
91
+ "The index name does not conform to the rules: "
92
+ "it must start with an English letter, contain only letters, numbers, and underscores, and have a length of 1-128."
93
+ )
94
+
95
+ def model_post_init(self, __context: Any) -> None:
96
+ self.precheck_index_naming()
97
+
98
+ # check whether collection exist, if not, create it
99
+ if not self.collection_status()["existed"]:
100
+ logger.warning(
101
+ f"VikingDB knowledgebase collection {self.index} does not exist, please create it first..."
102
+ )
103
+
104
+ self._tos_client = VeTOS(
105
+ ak=self.volcengine_access_key,
106
+ sk=self.volcengine_secret_key,
107
+ region=self.tos_config.region,
108
+ bucket_name=self.tos_config.bucket,
109
+ )
110
+
111
+ @override
112
+ def add_from_directory(self, directory: str, **kwargs) -> bool:
113
+ """
114
+ Args:
115
+ directory: str, the directory to add to knowledgebase
116
+ **kwargs:
117
+ - tos_bucket_name: str, the bucket name of TOS
118
+ - tos_bucket_path: str, the path of TOS bucket
119
+ """
120
+ tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
121
+ files = get_files_in_directory(directory=directory)
122
+ for _file in files:
123
+ content, file_name = _read_file_to_bytes(_file)
124
+ tos_url = self._upload_bytes_to_tos(
125
+ content,
126
+ tos_bucket_name=tos_bucket_name,
127
+ object_key=f"{tos_bucket_path}/{file_name}",
128
+ )
129
+ self._add_doc(tos_url=tos_url)
130
+ return True
131
+
132
+ @override
133
+ def add_from_files(self, files: list[str], **kwargs) -> bool:
134
+ """
135
+ Args:
136
+ files: list[str], the files to add to knowledgebase
137
+ **kwargs:
138
+ - tos_bucket_name: str, the bucket name of TOS
139
+ - tos_bucket_path: str, the path of TOS bucket
140
+ """
141
+ tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
142
+ for _file in files:
143
+ content, file_name = _read_file_to_bytes(_file)
144
+ tos_url = self._upload_bytes_to_tos(
145
+ content,
146
+ tos_bucket_name=tos_bucket_name,
147
+ object_key=f"{tos_bucket_path}/{file_name}",
148
+ )
149
+ self._add_doc(tos_url=tos_url)
150
+ return True
151
+
152
+ @override
153
+ def add_from_text(self, text: str | list[str], **kwargs) -> bool:
154
+ """
155
+ Args:
156
+ text: str or list[str], the text to add to knowledgebase
157
+ **kwargs:
158
+ - tos_bucket_name: str, the bucket name of TOS
159
+ - tos_bucket_path: str, the path of TOS bucket
160
+ """
161
+ tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
162
+ if isinstance(text, list):
163
+ object_keys = kwargs.get(
164
+ "tos_object_keys",
165
+ [
166
+ f"{tos_bucket_path}/{formatted_timestamp()}-{i}.txt"
167
+ for i, _ in enumerate(text)
168
+ ],
169
+ )
170
+ for _text, _object_key in zip(text, object_keys):
171
+ _content = _text.encode("utf-8")
172
+ tos_url = self._upload_bytes_to_tos(
173
+ _content, tos_bucket_name, _object_key
174
+ )
175
+ self._add_doc(tos_url=tos_url)
176
+ return True
177
+ elif isinstance(text, str):
178
+ content = text.encode("utf-8")
179
+ object_key = kwargs.get(
180
+ "object_key", f"veadk/knowledgebase/{formatted_timestamp()}.txt"
181
+ )
182
+ tos_url = self._upload_bytes_to_tos(content, tos_bucket_name, object_key)
183
+ self._add_doc(tos_url=tos_url)
184
+ else:
185
+ raise ValueError("text must be str or list[str]")
186
+ return True
187
+
188
+ def add_from_bytes(self, content: bytes, file_name: str, **kwargs) -> bool:
189
+ """
190
+ Args:
191
+ content: bytes, the content to add to knowledgebase, bytes
192
+ file_name: str, the file name of the content
193
+ **kwargs:
194
+ - tos_bucket_name: str, the bucket name of TOS
195
+ - tos_bucket_path: str, the path of TOS bucket
196
+ """
197
+ tos_bucket_name, tos_bucket_path = _extract_tos_attributes(**kwargs)
198
+ tos_url = self._upload_bytes_to_tos(
199
+ content,
200
+ tos_bucket_name=tos_bucket_name,
201
+ object_key=f"{tos_bucket_path}/{file_name}",
202
+ )
203
+ response = self._add_doc(tos_url=tos_url)
204
+ if response["code"] == 0:
205
+ return True
206
+ return False
207
+
208
+ @override
209
+ def search(self, query: str, top_k: int = 5) -> list:
210
+ return self._search_knowledge(query=query, top_k=top_k)
211
+
212
+ def delete_collection(self) -> bool:
213
+ DELETE_COLLECTION_PATH = "/api/knowledge/collection/delete"
214
+
215
+ response = self._do_request(
216
+ body={
217
+ "name": self.index,
218
+ "project": self.volcengine_project,
219
+ },
220
+ path=DELETE_COLLECTION_PATH,
221
+ method="POST",
222
+ )
223
+
224
+ if response.get("code") != 0:
225
+ logger.error(f"Error during collection deletion: {response}")
226
+ return False
227
+ return True
228
+
229
+ def delete_doc_by_id(self, id: str) -> bool:
230
+ DELETE_DOC_PATH = "/api/knowledge/doc/delete"
231
+ response = self._do_request(
232
+ body={
233
+ "collection_name": self.index,
234
+ "project": self.volcengine_project,
235
+ "doc_id": id,
236
+ },
237
+ path=DELETE_DOC_PATH,
238
+ method="POST",
239
+ )
240
+
241
+ if response.get("code") != 0:
242
+ return False
243
+ return True
244
+
245
+ def list_docs(self, offset: int = 0, limit: int = -1):
246
+ """List documents in collection.
247
+
248
+ Args:
249
+ offset (int): The offset of the first document to return.
250
+ limit (int): The maximum number of documents to return. -1 means return all documents but max is 100.
251
+ """
252
+ LIST_DOCS_PATH = "/api/knowledge/doc/list"
253
+ response = self._do_request(
254
+ body={
255
+ "collection_name": self.index,
256
+ "project": self.volcengine_project,
257
+ "offset": offset,
258
+ "limit": limit,
259
+ },
260
+ path=LIST_DOCS_PATH,
261
+ method="POST",
262
+ )
263
+ if response.get("code") != 0:
264
+ raise ValueError(f"Error during list documents: {response.get('code')}")
265
+ if not response["data"].get("doc_list", []):
266
+ return []
267
+ return response["data"]["doc_list"]
268
+
269
+ def list_chunks(self, offset: int = 0, limit: int = -1):
270
+ """List chunks in collection.
271
+
272
+ Args:
273
+ offset (int): The offset of the first chunk to return.
274
+ limit (int): The maximum number of chunks to return. -1 means return all chunks but max is 100.
275
+ """
276
+ LIST_CHUNKS_PATH = "/api/knowledge/point/list"
277
+ response = self._do_request(
278
+ body={
279
+ "collection_name": self.index,
280
+ "project": self.volcengine_project,
281
+ "offset": offset,
282
+ "limit": limit,
283
+ },
284
+ path=LIST_CHUNKS_PATH,
285
+ method="POST",
286
+ )
287
+
288
+ if response.get("code") != 0:
289
+ raise ValueError(f"Error during list chunks: {response}")
290
+
291
+ if not response["data"].get("point_list", []):
292
+ return []
293
+ data = [
294
+ {
295
+ "id": res["point_id"],
296
+ "content": res["content"],
297
+ "metadata": res["doc_info"],
298
+ }
299
+ for res in response["data"]["point_list"]
300
+ ]
301
+ return data
302
+
303
+ def collection_status(self):
304
+ COLLECTION_INFO_PATH = "/api/knowledge/collection/info"
305
+ response = self._do_request(
306
+ body={
307
+ "name": self.index,
308
+ "project": self.volcengine_project,
309
+ },
310
+ path=COLLECTION_INFO_PATH,
311
+ method="POST",
312
+ )
313
+ if response["code"] == 0:
314
+ status = response["data"]["pipeline_list"][0]["index_list"][0]["status"]
315
+ return {
316
+ "existed": True,
317
+ "status": status,
318
+ }
319
+ elif response["code"] == 1000005:
320
+ return {
321
+ "existed": False,
322
+ "status": None,
323
+ }
324
+ else:
325
+ raise ValueError(f"Error during collection status: {response}")
326
+
327
+ def create_collection(self) -> None:
328
+ CREATE_COLLECTION_PATH = "/api/knowledge/collection/create"
329
+
330
+ response = self._do_request(
331
+ body={
332
+ "name": self.index,
333
+ "project": "default",
334
+ "description": "Created by Volcengine Agent Development Kit (VeADK).",
335
+ },
336
+ path=CREATE_COLLECTION_PATH,
337
+ method="POST",
338
+ )
339
+
340
+ if response.get("code") != 0:
341
+ raise ValueError(
342
+ f"Error during collection creation: {response.get('code')}"
343
+ )
344
+
345
+ def _upload_bytes_to_tos(
346
+ self, content: bytes, tos_bucket_name: str, object_key: str
347
+ ) -> str:
348
+ self._tos_client.bucket_name = tos_bucket_name
349
+ asyncio.run(self._tos_client.upload(object_key=object_key, data=content))
350
+ return f"{self._tos_client.bucket_name}/{object_key}"
351
+
352
+ def _add_doc(self, tos_url: str) -> Any:
353
+ ADD_DOC_PATH = "/api/knowledge/doc/add"
354
+
355
+ response = self._do_request(
356
+ body={
357
+ "collection_name": self.index,
358
+ "project": "default",
359
+ "add_type": "tos",
360
+ "tos_path": tos_url,
361
+ },
362
+ path=ADD_DOC_PATH,
363
+ method="POST",
364
+ )
365
+ return response
366
+
367
+ def _search_knowledge(self, query: str, top_k: int = 5) -> list[str]:
368
+ SEARCH_KNOWLEDGE_PATH = "/api/knowledge/collection/search_knowledge"
369
+
370
+ response = self._do_request(
371
+ body={
372
+ "name": self.index,
373
+ "query": query,
374
+ "limit": top_k,
375
+ },
376
+ path=SEARCH_KNOWLEDGE_PATH,
377
+ method="POST",
378
+ )
379
+
380
+ if response.get("code") != 0:
381
+ raise ValueError(
382
+ f"Error during knowledge search: {response.get('code')}, message: {response.get('message')}"
383
+ )
384
+
385
+ search_result_list = response.get("data", {}).get("result_list", [])
386
+
387
+ return [
388
+ search_result.get("content", "") for search_result in search_result_list
389
+ ]
390
+
391
+ def _do_request(
392
+ self,
393
+ body: dict,
394
+ path: str,
395
+ method: Literal["GET", "POST", "PUT", "DELETE"] = "POST",
396
+ ) -> dict:
397
+ VIKINGDB_KNOWLEDGEBASE_BASE_URL = "api-knowledgebase.mlp.cn-beijing.volces.com"
398
+
399
+ request = build_vikingdb_knowledgebase_request(
400
+ path=path,
401
+ volcengine_access_key=self.volcengine_access_key,
402
+ volcengine_secret_key=self.volcengine_secret_key,
403
+ method=method,
404
+ data=body,
405
+ )
406
+ response = requests.request(
407
+ method=method,
408
+ url=f"https://{VIKINGDB_KNOWLEDGEBASE_BASE_URL}{path}",
409
+ headers=request.headers,
410
+ data=request.body,
411
+ )
412
+ return response.json()
@@ -12,79 +12,133 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import BinaryIO, Literal, TextIO
15
+ from typing import Any, Callable, Literal
16
16
 
17
- from veadk.database.database_adapter import get_knowledgebase_database_adapter
18
- from veadk.database.database_factory import DatabaseFactory
17
+ from pydantic import BaseModel, Field
18
+ from typing_extensions import Union
19
+
20
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
19
21
  from veadk.utils.logger import get_logger
20
22
 
21
23
  logger = get_logger(__name__)
22
24
 
23
25
 
26
+ def _get_backend_cls(backend: str) -> type[BaseKnowledgebaseBackend]:
27
+ match backend:
28
+ case "local":
29
+ from veadk.knowledgebase.backends.in_memory_backend import (
30
+ InMemoryKnowledgeBackend,
31
+ )
32
+
33
+ return InMemoryKnowledgeBackend
34
+ case "opensearch":
35
+ from veadk.knowledgebase.backends.opensearch_backend import (
36
+ OpensearchKnowledgeBackend,
37
+ )
38
+
39
+ return OpensearchKnowledgeBackend
40
+ case "viking":
41
+ from veadk.knowledgebase.backends.vikingdb_knowledge_backend import (
42
+ VikingDBKnowledgeBackend,
43
+ )
44
+
45
+ return VikingDBKnowledgeBackend
46
+ case "redis":
47
+ from veadk.knowledgebase.backends.redis_backend import (
48
+ RedisKnowledgeBackend,
49
+ )
50
+
51
+ return RedisKnowledgeBackend
52
+
53
+ raise ValueError(f"Unsupported knowledgebase backend: {backend}")
54
+
55
+
24
56
  def build_knowledgebase_index(app_name: str):
25
57
  return f"veadk_kb_{app_name}"
26
58
 
27
59
 
28
- class KnowledgeBase:
29
- def __init__(
30
- self,
31
- backend: Literal["local", "opensearch", "viking", "redis", "mysql"] = "local",
32
- top_k: int = 10,
33
- db_config=None,
34
- ):
35
- logger.info(f"Initializing knowledgebase: backend={backend} top_k={top_k}")
60
+ class KnowledgeBase(BaseModel):
61
+ backend: Union[
62
+ Literal["local", "opensearch", "viking", "redis"], BaseKnowledgebaseBackend
63
+ ] = "local"
64
+ """Knowledgebase backend type. Supported backends are:
65
+ - local: In-memory knowledgebase, data will be lost when the program exits.
66
+ - opensearch: OpenSearch knowledgebase, requires an OpenSearch cluster.
67
+ - viking: Volcengine VikingDB knowledgebase, requires VikingDB service.
68
+ - redis: Redis knowledgebase, requires Redis with vector search capability.
69
+ Default is `local`."""
36
70
 
37
- self.backend = backend
38
- self.top_k = top_k
71
+ backend_config: dict = Field(default_factory=dict)
72
+ """Configuration for the backend"""
39
73
 
40
- self.db_client = DatabaseFactory.create(backend=backend, config=db_config)
41
- self.adapter = get_knowledgebase_database_adapter(self.db_client)
74
+ top_k: int = 10
75
+ """Number of top similar documents to retrieve during search.
76
+
77
+ Default is 10."""
42
78
 
43
- logger.info(
44
- f"Initialized knowledgebase: db_client={self.db_client.__class__.__name__} adapter={self.adapter}"
45
- )
79
+ app_name: str = ""
46
80
 
47
- def add(
48
- self,
49
- data: str | list[str] | TextIO | BinaryIO | bytes,
50
- app_name: str,
51
- **kwargs,
52
- ):
53
- """
54
- Add documents to the vector database.
55
- You can only upload files or file characters when the adapter type used is vikingdb.
56
- In addition, if you upload data of the bytes type,
57
- for example, if you read the file stream of a pdf, then you need to pass an additional parameter file_ext = '.pdf'.
58
- """
59
- if self.backend != "viking" and not (
60
- isinstance(data, str) or isinstance(data, list)
61
- ):
62
- raise ValueError(
63
- "Only vikingdb supports uploading files or file characters."
64
- )
81
+ index: str = ""
82
+ """The name of the knowledgebase index. If not provided, it will be generated based on the `app_name`."""
65
83
 
66
- index = build_knowledgebase_index(app_name)
84
+ def model_post_init(self, __context: Any) -> None:
85
+ if isinstance(self.backend, BaseKnowledgebaseBackend):
86
+ self._backend = self.backend
87
+ logger.info(
88
+ f"Initialized knowledgebase with provided backend instance {self._backend.__class__.__name__}"
89
+ )
90
+ return
67
91
 
68
- logger.info(f"Adding documents to knowledgebase: index={index}")
92
+ # must provide at least one of them
93
+ if not self.app_name and not self.index:
94
+ raise ValueError(
95
+ "Either `app_name` or `index` must be provided one of them."
96
+ )
69
97
 
70
- self.adapter.add(data=data, index=index)
98
+ # priority use index
99
+ if self.app_name and self.index:
100
+ logger.warning(
101
+ "`app_name` and `index` are both provided, using `index` as the knowledgebase index name."
102
+ )
71
103
 
72
- def search(self, query: str, app_name: str, top_k: int | None = None) -> list[str]:
73
- top_k = self.top_k if top_k is None else top_k
104
+ # generate index name if `index` not provided but `app_name` is provided
105
+ if self.app_name and not self.index:
106
+ self.index = build_knowledgebase_index(self.app_name)
107
+ logger.info(
108
+ f"Knowledgebase index is set to {self.index} (generated by the app_name: {self.app_name})."
109
+ )
74
110
 
75
111
  logger.info(
76
- f"Searching knowledgebase: app_name={app_name} query={query} top_k={top_k}"
112
+ f"Initializing knowledgebase: backend={self.backend} top_k={self.top_k}"
113
+ )
114
+ self._backend = _get_backend_cls(self.backend)(
115
+ index=self.index, **self.backend_config if self.backend_config else {}
116
+ )
117
+ logger.info(
118
+ f"Initialized knowledgebase with backend {self._backend.__class__.__name__}"
77
119
  )
78
- index = build_knowledgebase_index(app_name)
79
- result = self.adapter.query(query=query, index=index, top_k=top_k)
80
- if len(result) == 0:
81
- logger.warning(f"No documents found in knowledgebase. Query: {query}")
82
- return result
83
-
84
- def delete_doc(self, app_name: str, id: str) -> bool:
85
- index = build_knowledgebase_index(app_name)
86
- return self.adapter.delete_doc(index=index, id=id)
87
-
88
- def list_docs(self, app_name: str, offset: int = 0, limit: int = 100) -> list[dict]:
89
- index = build_knowledgebase_index(app_name)
90
- return self.adapter.list_docs(index=index, offset=offset, limit=limit)
120
+
121
+ def add_from_directory(self, directory: str, **kwargs) -> bool:
122
+ """Add knowledge from file path to knowledgebase"""
123
+ return self._backend.add_from_directory(directory=directory, **kwargs)
124
+
125
+ def add_from_files(self, files: list[str], **kwargs) -> bool:
126
+ """Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
127
+ return self._backend.add_from_files(files=files, **kwargs)
128
+
129
+ def add_from_text(self, text: str | list[str], **kwargs) -> bool:
130
+ """Add knowledge from text to knowledgebase"""
131
+ return self._backend.add_from_text(text=text, **kwargs)
132
+
133
+ def search(self, query: str, top_k: int = 0, **kwargs) -> list[str]:
134
+ """Search knowledge from knowledgebase"""
135
+ if top_k == 0:
136
+ top_k = self.top_k
137
+ return self._backend.search(query=query, top_k=top_k, **kwargs)
138
+
139
+ def __getattr__(self, name) -> Callable:
140
+ """In case of knowledgebase have no backends' methods (`delete`, `list_chunks`, etc)
141
+
142
+ For example, knowledgebase.delete(...) -> self._backend.delete(...)
143
+ """
144
+ return getattr(self._backend, name)
veadk/memory/__init__.py CHANGED
@@ -11,3 +11,25 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+ from typing import TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ from veadk.memory.long_term_memory import LongTermMemory
19
+ from veadk.memory.short_term_memory import ShortTermMemory
20
+
21
+
22
+ # Lazy loading for classes
23
+ def __getattr__(name):
24
+ if name == "ShortTermMemory":
25
+ from veadk.memory.short_term_memory import ShortTermMemory
26
+
27
+ return ShortTermMemory
28
+ if name == "LongTeremMemory":
29
+ from veadk.memory.long_term_memory import LongTermMemory
30
+
31
+ return LongTermMemory
32
+ raise AttributeError(f"module 'veadk.memory' has no attribute '{name}'")
33
+
34
+
35
+ __all__ = ["ShortTermMemory", "LongTermMemory"]