veadk-python 0.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. veadk/__init__.py +37 -0
  2. veadk/a2a/__init__.py +13 -0
  3. veadk/a2a/agent_card.py +45 -0
  4. veadk/a2a/remote_ve_agent.py +390 -0
  5. veadk/a2a/utils/__init__.py +13 -0
  6. veadk/a2a/utils/agent_to_a2a.py +170 -0
  7. veadk/a2a/ve_a2a_server.py +93 -0
  8. veadk/a2a/ve_agent_executor.py +78 -0
  9. veadk/a2a/ve_middlewares.py +313 -0
  10. veadk/a2a/ve_task_store.py +37 -0
  11. veadk/agent.py +402 -0
  12. veadk/agent_builder.py +93 -0
  13. veadk/agents/loop_agent.py +68 -0
  14. veadk/agents/parallel_agent.py +72 -0
  15. veadk/agents/sequential_agent.py +64 -0
  16. veadk/auth/__init__.py +13 -0
  17. veadk/auth/base_auth.py +22 -0
  18. veadk/auth/ve_credential_service.py +203 -0
  19. veadk/auth/veauth/__init__.py +13 -0
  20. veadk/auth/veauth/apmplus_veauth.py +58 -0
  21. veadk/auth/veauth/ark_veauth.py +75 -0
  22. veadk/auth/veauth/base_veauth.py +50 -0
  23. veadk/auth/veauth/cozeloop_veauth.py +13 -0
  24. veadk/auth/veauth/opensearch_veauth.py +75 -0
  25. veadk/auth/veauth/postgresql_veauth.py +75 -0
  26. veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
  27. veadk/auth/veauth/speech_veauth.py +54 -0
  28. veadk/auth/veauth/utils.py +69 -0
  29. veadk/auth/veauth/vesearch_veauth.py +62 -0
  30. veadk/auth/veauth/viking_mem0_veauth.py +91 -0
  31. veadk/cli/__init__.py +13 -0
  32. veadk/cli/cli.py +58 -0
  33. veadk/cli/cli_clean.py +87 -0
  34. veadk/cli/cli_create.py +163 -0
  35. veadk/cli/cli_deploy.py +233 -0
  36. veadk/cli/cli_eval.py +215 -0
  37. veadk/cli/cli_init.py +214 -0
  38. veadk/cli/cli_kb.py +110 -0
  39. veadk/cli/cli_pipeline.py +285 -0
  40. veadk/cli/cli_prompt.py +86 -0
  41. veadk/cli/cli_update.py +106 -0
  42. veadk/cli/cli_uploadevalset.py +139 -0
  43. veadk/cli/cli_web.py +143 -0
  44. veadk/cloud/__init__.py +13 -0
  45. veadk/cloud/cloud_agent_engine.py +485 -0
  46. veadk/cloud/cloud_app.py +475 -0
  47. veadk/config.py +115 -0
  48. veadk/configs/__init__.py +13 -0
  49. veadk/configs/auth_configs.py +133 -0
  50. veadk/configs/database_configs.py +132 -0
  51. veadk/configs/model_configs.py +78 -0
  52. veadk/configs/tool_configs.py +54 -0
  53. veadk/configs/tracing_configs.py +110 -0
  54. veadk/consts.py +74 -0
  55. veadk/evaluation/__init__.py +17 -0
  56. veadk/evaluation/adk_evaluator/__init__.py +17 -0
  57. veadk/evaluation/adk_evaluator/adk_evaluator.py +302 -0
  58. veadk/evaluation/base_evaluator.py +642 -0
  59. veadk/evaluation/deepeval_evaluator/__init__.py +17 -0
  60. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +339 -0
  61. veadk/evaluation/eval_set_file_loader.py +48 -0
  62. veadk/evaluation/eval_set_recorder.py +146 -0
  63. veadk/evaluation/types.py +65 -0
  64. veadk/evaluation/utils/prometheus.py +196 -0
  65. veadk/integrations/__init__.py +13 -0
  66. veadk/integrations/ve_apig/__init__.py +13 -0
  67. veadk/integrations/ve_apig/ve_apig.py +349 -0
  68. veadk/integrations/ve_apig/ve_apig_utils.py +332 -0
  69. veadk/integrations/ve_code_pipeline/__init__.py +13 -0
  70. veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
  71. veadk/integrations/ve_cozeloop/__init__.py +13 -0
  72. veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
  73. veadk/integrations/ve_cr/__init__.py +13 -0
  74. veadk/integrations/ve_cr/ve_cr.py +220 -0
  75. veadk/integrations/ve_faas/__init__.py +13 -0
  76. veadk/integrations/ve_faas/template/cookiecutter.json +15 -0
  77. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/__init__.py +13 -0
  78. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/clean.py +23 -0
  79. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/config.yaml.example +6 -0
  80. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +106 -0
  81. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/__init__.py +13 -0
  82. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +25 -0
  83. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/app.py +202 -0
  84. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/requirements.txt +3 -0
  85. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +49 -0
  86. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{ cookiecutter.app_name }}/__init__.py +14 -0
  87. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{ cookiecutter.app_name }}/agent.py +27 -0
  88. veadk/integrations/ve_faas/ve_faas.py +754 -0
  89. veadk/integrations/ve_faas/ve_faas_utils.py +408 -0
  90. veadk/integrations/ve_faas/web_template/cookiecutter.json +20 -0
  91. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/__init__.py +13 -0
  92. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/clean.py +23 -0
  93. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/config.yaml.example +2 -0
  94. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/deploy.py +44 -0
  95. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/Dockerfile +23 -0
  96. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/app.py +123 -0
  97. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/init_db.py +46 -0
  98. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/models.py +36 -0
  99. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/requirements.txt +4 -0
  100. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/run.sh +21 -0
  101. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/static/css/style.css +368 -0
  102. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/static/js/admin.js +0 -0
  103. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/dashboard.html +21 -0
  104. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/edit_post.html +24 -0
  105. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/login.html +21 -0
  106. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/posts.html +53 -0
  107. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/base.html +45 -0
  108. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/index.html +29 -0
  109. veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/post.html +14 -0
  110. veadk/integrations/ve_identity/__init__.py +110 -0
  111. veadk/integrations/ve_identity/auth_config.py +261 -0
  112. veadk/integrations/ve_identity/auth_mixins.py +650 -0
  113. veadk/integrations/ve_identity/auth_processor.py +385 -0
  114. veadk/integrations/ve_identity/function_tool.py +158 -0
  115. veadk/integrations/ve_identity/identity_client.py +864 -0
  116. veadk/integrations/ve_identity/mcp_tool.py +181 -0
  117. veadk/integrations/ve_identity/mcp_toolset.py +431 -0
  118. veadk/integrations/ve_identity/models.py +228 -0
  119. veadk/integrations/ve_identity/token_manager.py +188 -0
  120. veadk/integrations/ve_identity/utils.py +151 -0
  121. veadk/integrations/ve_prompt_pilot/__init__.py +13 -0
  122. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +85 -0
  123. veadk/integrations/ve_tls/__init__.py +13 -0
  124. veadk/integrations/ve_tls/utils.py +116 -0
  125. veadk/integrations/ve_tls/ve_tls.py +212 -0
  126. veadk/integrations/ve_tos/ve_tos.py +710 -0
  127. veadk/integrations/ve_viking_db_memory/__init__.py +13 -0
  128. veadk/integrations/ve_viking_db_memory/ve_viking_db_memory.py +308 -0
  129. veadk/knowledgebase/__init__.py +17 -0
  130. veadk/knowledgebase/backends/__init__.py +13 -0
  131. veadk/knowledgebase/backends/base_backend.py +72 -0
  132. veadk/knowledgebase/backends/in_memory_backend.py +91 -0
  133. veadk/knowledgebase/backends/opensearch_backend.py +162 -0
  134. veadk/knowledgebase/backends/redis_backend.py +172 -0
  135. veadk/knowledgebase/backends/utils.py +92 -0
  136. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +608 -0
  137. veadk/knowledgebase/entry.py +25 -0
  138. veadk/knowledgebase/knowledgebase.py +307 -0
  139. veadk/memory/__init__.py +35 -0
  140. veadk/memory/long_term_memory.py +365 -0
  141. veadk/memory/long_term_memory_backends/__init__.py +13 -0
  142. veadk/memory/long_term_memory_backends/base_backend.py +35 -0
  143. veadk/memory/long_term_memory_backends/in_memory_backend.py +67 -0
  144. veadk/memory/long_term_memory_backends/mem0_backend.py +155 -0
  145. veadk/memory/long_term_memory_backends/opensearch_backend.py +124 -0
  146. veadk/memory/long_term_memory_backends/redis_backend.py +140 -0
  147. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +189 -0
  148. veadk/memory/short_term_memory.py +252 -0
  149. veadk/memory/short_term_memory_backends/__init__.py +13 -0
  150. veadk/memory/short_term_memory_backends/base_backend.py +31 -0
  151. veadk/memory/short_term_memory_backends/mysql_backend.py +49 -0
  152. veadk/memory/short_term_memory_backends/postgresql_backend.py +49 -0
  153. veadk/memory/short_term_memory_backends/sqlite_backend.py +55 -0
  154. veadk/memory/short_term_memory_processor.py +100 -0
  155. veadk/processors/__init__.py +26 -0
  156. veadk/processors/base_run_processor.py +120 -0
  157. veadk/prompts/__init__.py +13 -0
  158. veadk/prompts/agent_default_prompt.py +30 -0
  159. veadk/prompts/prompt_evaluator.py +20 -0
  160. veadk/prompts/prompt_memory_processor.py +55 -0
  161. veadk/prompts/prompt_optimization.py +150 -0
  162. veadk/runner.py +732 -0
  163. veadk/tools/__init__.py +13 -0
  164. veadk/tools/builtin_tools/__init__.py +13 -0
  165. veadk/tools/builtin_tools/agent_authorization.py +94 -0
  166. veadk/tools/builtin_tools/generate_image.py +23 -0
  167. veadk/tools/builtin_tools/image_edit.py +300 -0
  168. veadk/tools/builtin_tools/image_generate.py +446 -0
  169. veadk/tools/builtin_tools/lark.py +67 -0
  170. veadk/tools/builtin_tools/las.py +24 -0
  171. veadk/tools/builtin_tools/link_reader.py +66 -0
  172. veadk/tools/builtin_tools/llm_shield.py +381 -0
  173. veadk/tools/builtin_tools/load_knowledgebase.py +97 -0
  174. veadk/tools/builtin_tools/mcp_router.py +29 -0
  175. veadk/tools/builtin_tools/run_code.py +113 -0
  176. veadk/tools/builtin_tools/tts.py +253 -0
  177. veadk/tools/builtin_tools/vesearch.py +49 -0
  178. veadk/tools/builtin_tools/video_generate.py +363 -0
  179. veadk/tools/builtin_tools/web_scraper.py +76 -0
  180. veadk/tools/builtin_tools/web_search.py +83 -0
  181. veadk/tools/demo_tools.py +58 -0
  182. veadk/tools/load_knowledgebase_tool.py +149 -0
  183. veadk/tools/sandbox/__init__.py +13 -0
  184. veadk/tools/sandbox/browser_sandbox.py +37 -0
  185. veadk/tools/sandbox/code_sandbox.py +40 -0
  186. veadk/tools/sandbox/computer_sandbox.py +34 -0
  187. veadk/tracing/__init__.py +13 -0
  188. veadk/tracing/base_tracer.py +58 -0
  189. veadk/tracing/telemetry/__init__.py +13 -0
  190. veadk/tracing/telemetry/attributes/attributes.py +29 -0
  191. veadk/tracing/telemetry/attributes/extractors/common_attributes_extractors.py +180 -0
  192. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +858 -0
  193. veadk/tracing/telemetry/attributes/extractors/tool_attributes_extractors.py +152 -0
  194. veadk/tracing/telemetry/attributes/extractors/types.py +164 -0
  195. veadk/tracing/telemetry/exporters/__init__.py +13 -0
  196. veadk/tracing/telemetry/exporters/apmplus_exporter.py +558 -0
  197. veadk/tracing/telemetry/exporters/base_exporter.py +39 -0
  198. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +129 -0
  199. veadk/tracing/telemetry/exporters/inmemory_exporter.py +248 -0
  200. veadk/tracing/telemetry/exporters/tls_exporter.py +139 -0
  201. veadk/tracing/telemetry/opentelemetry_tracer.py +320 -0
  202. veadk/tracing/telemetry/telemetry.py +411 -0
  203. veadk/types.py +47 -0
  204. veadk/utils/__init__.py +13 -0
  205. veadk/utils/audio_manager.py +95 -0
  206. veadk/utils/auth.py +294 -0
  207. veadk/utils/logger.py +59 -0
  208. veadk/utils/mcp_utils.py +44 -0
  209. veadk/utils/misc.py +184 -0
  210. veadk/utils/patches.py +101 -0
  211. veadk/utils/volcengine_sign.py +205 -0
  212. veadk/version.py +15 -0
  213. veadk_python-0.2.27.dist-info/METADATA +373 -0
  214. veadk_python-0.2.27.dist-info/RECORD +218 -0
  215. veadk_python-0.2.27.dist-info/WHEEL +5 -0
  216. veadk_python-0.2.27.dist-info/entry_points.txt +2 -0
  217. veadk_python-0.2.27.dist-info/licenses/LICENSE +201 -0
  218. veadk_python-0.2.27.dist-info/top_level.txt +1 -0
@@ -0,0 +1,608 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import asyncio
16
+ import json
17
+ import os
18
+ import re
19
+ from pathlib import Path
20
+ from typing import Any, Literal
21
+
22
+ import requests
23
+ from pydantic import Field
24
+ from typing_extensions import override
25
+ from veadk.utils.misc import getenv
26
+ import veadk.config # noqa E401
27
+ from veadk.auth.veauth.utils import get_credential_from_vefaas_iam
28
+ from veadk.configs.database_configs import NormalTOSConfig, TOSConfig
29
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
30
+ from veadk.knowledgebase.backends.utils import build_vikingdb_knowledgebase_request
31
+ from veadk.knowledgebase.entry import KnowledgebaseEntry
32
+ from veadk.utils.logger import get_logger
33
+ from veadk.utils.misc import formatted_timestamp
34
+
35
+ try:
36
+ from veadk.integrations.ve_tos.ve_tos import VeTOS
37
+ except ImportError:
38
+ raise ImportError(
39
+ "Please install VeADK extensions\npip install veadk-python[extensions]"
40
+ )
41
+
42
+ logger = get_logger(__name__)
43
+
44
+
45
+ def _read_file_to_bytes(file_path: str) -> tuple[bytes, str]:
46
+ """Read file content to bytes, and file name"""
47
+ with open(file_path, "rb") as f:
48
+ file_content = f.read()
49
+ file_name = file_path.split("/")[-1]
50
+ return file_content, file_name
51
+
52
+
53
+ def get_files_in_directory(directory: str):
54
+ dir_path = Path(directory)
55
+ if not dir_path.is_dir():
56
+ raise ValueError(f"The directory does not exist: {directory}")
57
+ file_paths = [str(file) for file in dir_path.iterdir() if file.is_file()]
58
+ return file_paths
59
+
60
+
61
+ class VikingDBKnowledgeBackend(BaseKnowledgebaseBackend):
62
+ """Volcengine Viking DB knowledgebase backend.
63
+
64
+ Volcegnine Viking DB knowledgebase provides powerful knowledgebase storage and search.
65
+
66
+ Attributes:
67
+ volcengine_access_key (str | None):
68
+ Access key for Volcengine. Loaded automatically from the
69
+ `VOLCENGINE_ACCESS_KEY` environment variable if not provided.
70
+
71
+ volcengine_secret_key (str | None):
72
+ Secret key for Volcengine. Loaded automatically from the
73
+ `VOLCENGINE_SECRET_KEY` environment variable if not provided.
74
+
75
+ session_token (str):
76
+ Optional session token for temporary credentials. Defaults to an empty string.
77
+
78
+ volcengine_project (str):
79
+ VikingDB knowledgebase project name in the Volcengine console platform.
80
+ Defaults to `"default"`.
81
+
82
+ region (str): Region of the VikingDB knowledgebase. Defaults to `"cn-beijing"`.
83
+
84
+ tos_config (TOSConfig | NormalTOSConfig):
85
+ TOS configuration used for uploading files to TOS (Volcengine’s Object Storage).
86
+ Defaults to a new instance of `TOSConfig`.
87
+
88
+ Notes:
89
+ Please make sure that you have created a bucket in your TOS.
90
+
91
+ Examples:
92
+ Init a knowledgebase based on VikingDB knowledgebase backend.
93
+
94
+ ```python
95
+ knowledgebase = Knowledgebase(backend="redis")
96
+ ```
97
+
98
+ With more configurations:
99
+
100
+ ```python
101
+ ...
102
+ ```
103
+ """
104
+
105
+ volcengine_access_key: str | None = Field(
106
+ default_factory=lambda: os.getenv("VOLCENGINE_ACCESS_KEY")
107
+ )
108
+ volcengine_secret_key: str | None = Field(
109
+ default_factory=lambda: os.getenv("VOLCENGINE_SECRET_KEY")
110
+ )
111
+ session_token: str = ""
112
+
113
+ volcengine_project: str = "default"
114
+
115
+ region: str = "cn-beijing"
116
+
117
+ tos_config: TOSConfig | NormalTOSConfig = Field(default_factory=TOSConfig)
118
+
119
+ def model_post_init(self, __context: Any) -> None:
120
+ self.precheck_index_naming()
121
+
122
+ # check whether collection exist, if not, create it
123
+ if not self.collection_status()["existed"]:
124
+ logger.warning(
125
+ f"VikingDB knowledgebase collection {self.index} does not exist, please create it first..."
126
+ )
127
+ self.create_collection()
128
+
129
+ def precheck_index_naming(self):
130
+ if not (
131
+ isinstance(self.index, str)
132
+ and 0 < len(self.index) <= 128
133
+ and re.fullmatch(r"^[a-zA-Z][a-zA-Z0-9_]*$", self.index)
134
+ ):
135
+ raise ValueError(
136
+ "The index name does not conform to the rules: "
137
+ "it must start with an English letter, contain only letters, numbers, and underscores, and have a length of 1-128."
138
+ )
139
+
140
+ def _get_tos_client(self, tos_bucket_name: str) -> VeTOS:
141
+ volcengine_access_key = self.volcengine_access_key
142
+ volcengine_secret_key = self.volcengine_secret_key
143
+ session_token = self.session_token
144
+
145
+ if not (volcengine_access_key and volcengine_secret_key):
146
+ cred = get_credential_from_vefaas_iam()
147
+ volcengine_access_key = cred.access_key_id
148
+ volcengine_secret_key = cred.secret_access_key
149
+ session_token = cred.session_token
150
+
151
+ return VeTOS(
152
+ ak=volcengine_access_key,
153
+ sk=volcengine_secret_key,
154
+ session_token=session_token,
155
+ region=self.tos_config.region,
156
+ bucket_name=tos_bucket_name or self.tos_config.bucket,
157
+ )
158
+
159
+ @override
160
+ def add_from_directory(
161
+ self,
162
+ directory: str,
163
+ tos_bucket_name: str | None = None,
164
+ tos_bucket_path: str = "knowledgebase",
165
+ metadata: dict | None = None,
166
+ **kwargs,
167
+ ) -> bool:
168
+ """Add knowledge from a directory to the knowledgebase.
169
+
170
+ Args:
171
+ directory (str): The directory to add to knowledgebase.
172
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
173
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
174
+ metadata (dict | None, optional): The metadata of the files. Defaults to None.
175
+ **kwargs: Additional keyword arguments.
176
+
177
+ Returns:
178
+ bool: True if successful, False otherwise.
179
+ """
180
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
181
+ files = get_files_in_directory(directory=directory)
182
+ for _file in files:
183
+ content, file_name = _read_file_to_bytes(_file)
184
+ tos_url = self._upload_bytes_to_tos(
185
+ content,
186
+ tos_bucket_name=tos_bucket_name,
187
+ object_key=f"{tos_bucket_path}/{file_name}",
188
+ metadata=metadata,
189
+ )
190
+ self._add_doc(tos_url=tos_url)
191
+ return True
192
+
193
+ @override
194
+ def add_from_files(
195
+ self,
196
+ files: list[str],
197
+ tos_bucket_name: str | None = None,
198
+ tos_bucket_path: str = "knowledgebase",
199
+ metadata: dict | None = None,
200
+ **kwargs,
201
+ ) -> bool:
202
+ """Add knowledge from a directory to the knowledgebase.
203
+
204
+ Args:
205
+ files (list[str]): The files to add to knowledgebase.
206
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
207
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
208
+ metadata (dict | None, optional): The metadata of the files. Defaults to None.
209
+ **kwargs: Additional keyword arguments.
210
+
211
+ Returns:
212
+ bool: True if successful, False otherwise.
213
+ """
214
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
215
+ for _file in files:
216
+ content, file_name = _read_file_to_bytes(_file)
217
+ tos_url = self._upload_bytes_to_tos(
218
+ content,
219
+ tos_bucket_name=tos_bucket_name,
220
+ object_key=f"{tos_bucket_path}/{file_name}",
221
+ metadata=metadata,
222
+ )
223
+ self._add_doc(tos_url=tos_url)
224
+ return True
225
+
226
+ @override
227
+ def add_from_text(
228
+ self,
229
+ text: str | list[str],
230
+ tos_bucket_name: str | None = None,
231
+ tos_bucket_path: str = "knowledgebase",
232
+ metadata: dict | None = None,
233
+ **kwargs,
234
+ ) -> bool:
235
+ """Add knowledge from text to the knowledgebase.
236
+
237
+ Args:
238
+ text (str | list[str]): The text to add to knowledgebase.
239
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
240
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
241
+
242
+ Returns:
243
+ bool: True if successful, False otherwise.
244
+ """
245
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
246
+ if isinstance(text, list):
247
+ object_keys = kwargs.get(
248
+ "tos_object_keys",
249
+ [
250
+ f"{tos_bucket_path}/{formatted_timestamp()}-{i}.txt"
251
+ for i, _ in enumerate(text)
252
+ ],
253
+ )
254
+ for _text, _object_key in zip(text, object_keys):
255
+ _content = _text.encode("utf-8")
256
+ tos_url = self._upload_bytes_to_tos(
257
+ _content, tos_bucket_name, _object_key, metadata=metadata
258
+ )
259
+ self._add_doc(tos_url=tos_url)
260
+ return True
261
+ elif isinstance(text, str):
262
+ content = text.encode("utf-8")
263
+ object_key = kwargs.get(
264
+ "object_key", f"veadk/knowledgebase/{formatted_timestamp()}.txt"
265
+ )
266
+ tos_url = self._upload_bytes_to_tos(
267
+ content, tos_bucket_name, object_key, metadata=metadata
268
+ )
269
+ self._add_doc(tos_url=tos_url)
270
+ else:
271
+ raise ValueError("text must be str or list[str]")
272
+ return True
273
+
274
+ def add_from_bytes(
275
+ self,
276
+ content: bytes,
277
+ file_name: str,
278
+ tos_bucket_name: str | None = None,
279
+ tos_bucket_path: str = "knowledgebase",
280
+ metadata: dict | None = None,
281
+ **kwargs,
282
+ ) -> bool:
283
+ """Add knowledge from bytes to the knowledgebase.
284
+
285
+ Args:
286
+ content (bytes): The content to add to knowledgebase.
287
+ file_name (str): The file name of the content.
288
+ tos_bucket_name (str | None, optional): The bucket name of TOS. Defaults to None.
289
+ tos_bucket_path (str, optional): The path of TOS bucket. Defaults to "knowledgebase".
290
+ metadata (dict | None, optional): The metadata of the files. Defaults to None.
291
+ **kwargs: Additional keyword arguments.
292
+
293
+ Returns:
294
+ bool: True if successful, False otherwise.
295
+ """
296
+ tos_bucket_name = tos_bucket_name or self.tos_config.bucket
297
+ tos_url = self._upload_bytes_to_tos(
298
+ content,
299
+ tos_bucket_name=tos_bucket_name,
300
+ object_key=f"{tos_bucket_path}/{file_name}",
301
+ metadata=metadata,
302
+ )
303
+ response = self._add_doc(tos_url=tos_url)
304
+ if response["code"] == 0:
305
+ return True
306
+ return False
307
+
308
+ @override
309
+ def search(
310
+ self,
311
+ query: str,
312
+ top_k: int = 5,
313
+ metadata: dict | None = None,
314
+ rerank: bool = True,
315
+ ) -> list:
316
+ return self._search_knowledge(
317
+ query=query, top_k=top_k, metadata=metadata, rerank=rerank
318
+ )
319
+
320
+ def delete_collection(self) -> bool:
321
+ DELETE_COLLECTION_PATH = "/api/knowledge/collection/delete"
322
+
323
+ response = self._do_request(
324
+ body={
325
+ "name": self.index,
326
+ "project": self.volcengine_project,
327
+ },
328
+ path=DELETE_COLLECTION_PATH,
329
+ method="POST",
330
+ )
331
+
332
+ if response.get("code") != 0:
333
+ logger.error(f"Error during collection deletion: {response}")
334
+ return False
335
+ return True
336
+
337
+ def delete_doc_by_id(self, id: str) -> bool:
338
+ DELETE_DOC_PATH = "/api/knowledge/doc/delete"
339
+ response = self._do_request(
340
+ body={
341
+ "collection_name": self.index,
342
+ "project": self.volcengine_project,
343
+ "doc_id": id,
344
+ },
345
+ path=DELETE_DOC_PATH,
346
+ method="POST",
347
+ )
348
+
349
+ if response.get("code") != 0:
350
+ return False
351
+ return True
352
+
353
+ def list_docs(self, offset: int = 0, limit: int = -1):
354
+ """List documents in collection.
355
+
356
+ Args:
357
+ offset (int): The offset of the first document to return.
358
+ limit (int): The maximum number of documents to return. -1 means return all documents but max is 100.
359
+ """
360
+ LIST_DOCS_PATH = "/api/knowledge/doc/list"
361
+ response = self._do_request(
362
+ body={
363
+ "collection_name": self.index,
364
+ "project": self.volcengine_project,
365
+ "offset": offset,
366
+ "limit": limit,
367
+ },
368
+ path=LIST_DOCS_PATH,
369
+ method="POST",
370
+ )
371
+ if response.get("code") != 0:
372
+ raise ValueError(f"Error during list documents: {response.get('code')}")
373
+ if not response["data"].get("doc_list", []):
374
+ return []
375
+ return response["data"]["doc_list"]
376
+
377
+ def list_chunks(self, offset: int = 0, limit: int = -1):
378
+ """List chunks in collection.
379
+
380
+ Args:
381
+ offset (int): The offset of the first chunk to return.
382
+ limit (int): The maximum number of chunks to return. -1 means return all chunks but max is 100.
383
+ """
384
+ LIST_CHUNKS_PATH = "/api/knowledge/point/list"
385
+ response = self._do_request(
386
+ body={
387
+ "collection_name": self.index,
388
+ "project": self.volcengine_project,
389
+ "offset": offset,
390
+ "limit": limit,
391
+ },
392
+ path=LIST_CHUNKS_PATH,
393
+ method="POST",
394
+ )
395
+
396
+ if response.get("code") != 0:
397
+ raise ValueError(f"Error during list chunks: {response}")
398
+
399
+ if not response["data"].get("point_list", []):
400
+ return []
401
+ data = [
402
+ {
403
+ "id": res["point_id"],
404
+ "content": res["content"],
405
+ "metadata": res["doc_info"],
406
+ }
407
+ for res in response["data"]["point_list"]
408
+ ]
409
+ return data
410
+
411
+ def collection_status(self):
412
+ COLLECTION_INFO_PATH = "/api/knowledge/collection/info"
413
+ response = self._do_request(
414
+ body={
415
+ "name": self.index,
416
+ "project": self.volcengine_project,
417
+ },
418
+ path=COLLECTION_INFO_PATH,
419
+ method="POST",
420
+ )
421
+ if response["code"] == 0:
422
+ status = response["data"]["pipeline_list"][0]["index_list"][0]["status"]
423
+ return {
424
+ "existed": True,
425
+ "status": status,
426
+ }
427
+ elif response["code"] == 1000005:
428
+ return {
429
+ "existed": False,
430
+ "status": None,
431
+ }
432
+ else:
433
+ raise ValueError(f"Error during collection status: {response}")
434
+
435
+ def create_collection(self) -> None:
436
+ CREATE_COLLECTION_PATH = "/api/knowledge/collection/create"
437
+
438
+ response = self._do_request(
439
+ body={
440
+ "name": self.index,
441
+ "project": self.volcengine_project,
442
+ "description": "Created by Volcengine Agent Development Kit (VeADK).",
443
+ },
444
+ path=CREATE_COLLECTION_PATH,
445
+ method="POST",
446
+ )
447
+ logger.debug(
448
+ f"Create collection {self.index} using project {self.volcengine_project} response: {response}"
449
+ )
450
+
451
+ if response.get("code") != 0:
452
+ raise ValueError(
453
+ f"Error during collection creation: {response.get('code')}"
454
+ )
455
+
456
+ def _upload_bytes_to_tos(
457
+ self,
458
+ content: bytes,
459
+ tos_bucket_name: str,
460
+ object_key: str,
461
+ metadata: dict | None = None,
462
+ ) -> str:
463
+ # Here, we set the metadata via the TOS object, ref: https://www.volcengine.com/docs/84313/1254624
464
+ self._tos_client = self._get_tos_client(tos_bucket_name)
465
+
466
+ self._tos_client.bucket_name = tos_bucket_name
467
+ coro = self._tos_client.upload(
468
+ object_key=object_key,
469
+ bucket_name=tos_bucket_name,
470
+ data=content,
471
+ metadata=metadata,
472
+ )
473
+ try:
474
+ loop = asyncio.get_running_loop()
475
+ loop.run_until_complete(
476
+ coro
477
+ ) if not loop.is_running() else asyncio.ensure_future(coro)
478
+ except RuntimeError:
479
+ asyncio.run(coro)
480
+ return f"{self._tos_client.bucket_name}/{object_key}"
481
+
482
+ def _add_doc(self, tos_url: str) -> Any:
483
+ ADD_DOC_PATH = "/api/knowledge/doc/add"
484
+
485
+ response = self._do_request(
486
+ body={
487
+ "collection_name": self.index,
488
+ "project": self.volcengine_project,
489
+ "add_type": "tos",
490
+ "tos_path": tos_url,
491
+ },
492
+ path=ADD_DOC_PATH,
493
+ method="POST",
494
+ )
495
+ return response
496
+
497
+ def _search_knowledge(
498
+ self,
499
+ query: str,
500
+ top_k: int = 5,
501
+ metadata: dict | None = None,
502
+ rerank: bool = True,
503
+ chunk_diffusion_count: int | None = 3,
504
+ ) -> list[KnowledgebaseEntry]:
505
+ SEARCH_KNOWLEDGE_PATH = "/api/knowledge/collection/search_knowledge"
506
+
507
+ query_param = (
508
+ {
509
+ "doc_filter": {
510
+ "op": "and",
511
+ "conds": [
512
+ {"op": "must", "field": str(k), "conds": [str(v)]}
513
+ for k, v in metadata.items()
514
+ ],
515
+ }
516
+ }
517
+ if metadata
518
+ else None
519
+ )
520
+
521
+ post_precessing = {
522
+ "rerank_swich": rerank,
523
+ "chunk_diffusion_count": chunk_diffusion_count,
524
+ }
525
+
526
+ response = self._do_request(
527
+ body={
528
+ "name": self.index,
529
+ "project": self.volcengine_project,
530
+ "query": query,
531
+ "limit": top_k,
532
+ "query_param": query_param,
533
+ "post_processing": post_precessing,
534
+ },
535
+ path=SEARCH_KNOWLEDGE_PATH,
536
+ method="POST",
537
+ )
538
+
539
+ if response.get("code") != 0:
540
+ raise ValueError(
541
+ f"Error during knowledge search: {response.get('code')}, message: {response.get('message')}"
542
+ )
543
+
544
+ entries = []
545
+ for result in response.get("data", {}).get("result_list", []):
546
+ doc_meta_raw_str = result.get("doc_info", {}).get("doc_meta")
547
+ doc_meta_list = json.loads(doc_meta_raw_str) if doc_meta_raw_str else []
548
+ metadata = {}
549
+ for meta in doc_meta_list:
550
+ metadata[meta["field_name"]] = meta["field_value"]
551
+
552
+ entries.append(
553
+ KnowledgebaseEntry(content=result.get("content", ""), metadata=metadata)
554
+ )
555
+
556
+ return entries
557
+
558
+ def _do_request(
559
+ self,
560
+ body: dict,
561
+ path: str,
562
+ method: Literal["GET", "POST", "PUT", "DELETE"] = "POST",
563
+ ) -> dict:
564
+ VIKINGDB_KNOWLEDGEBASE_BASE_URL = (
565
+ "https://api-knowledgebase.mlp.cn-beijing.volces.com"
566
+ )
567
+ full_path = f"{VIKINGDB_KNOWLEDGEBASE_BASE_URL}{path}"
568
+
569
+ env_host = getenv(
570
+ "DATABASE_VIKING_BASE_URL", default_value=None, allow_false_values=True
571
+ )
572
+ if env_host:
573
+ if env_host.startswith("http://") or env_host.startswith("https://"):
574
+ full_path = f"{env_host}{path}"
575
+ else:
576
+ raise ValueError(
577
+ "DATABASE_VIKING_BASE_URL must start with http:// or https://"
578
+ )
579
+
580
+ volcengine_access_key = self.volcengine_access_key
581
+ volcengine_secret_key = self.volcengine_secret_key
582
+ session_token = self.session_token
583
+
584
+ if not (volcengine_access_key and volcengine_secret_key):
585
+ cred = get_credential_from_vefaas_iam()
586
+ volcengine_access_key = cred.access_key_id
587
+ volcengine_secret_key = cred.secret_access_key
588
+ session_token = cred.session_token
589
+
590
+ request = build_vikingdb_knowledgebase_request(
591
+ path=path,
592
+ volcengine_access_key=volcengine_access_key,
593
+ volcengine_secret_key=volcengine_secret_key,
594
+ session_token=session_token,
595
+ method=method,
596
+ data=body,
597
+ )
598
+ response = requests.request(
599
+ method=method,
600
+ url=full_path,
601
+ headers=request.headers,
602
+ data=request.body,
603
+ )
604
+ if not response.ok:
605
+ logger.error(
606
+ f"VikingDBKnowledgeBackend error during request: {response.json()}"
607
+ )
608
+ return response.json()
@@ -0,0 +1,25 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from pydantic import BaseModel
16
+
17
+
18
+ class KnowledgebaseEntry(BaseModel):
19
+ """Represents a single entry in the knowledgebase."""
20
+
21
+ # The main content of the knowledgebase entry.
22
+ content: str
23
+
24
+ # Optional metadata associated with the entry.
25
+ metadata: dict | None = None