veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (102) hide show
  1. veadk/agent.py +11 -18
  2. veadk/agent_builder.py +94 -0
  3. veadk/{database/__init__.py → auth/base_auth.py} +7 -2
  4. veadk/auth/veauth/apmplus_veauth.py +65 -0
  5. veadk/auth/veauth/ark_veauth.py +77 -0
  6. veadk/auth/veauth/base_veauth.py +50 -0
  7. veadk/auth/veauth/opensearch_veauth.py +75 -0
  8. veadk/auth/veauth/postgresql_veauth.py +75 -0
  9. veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
  10. veadk/auth/veauth/vesearch_veauth.py +62 -0
  11. veadk/cli/cli.py +4 -0
  12. veadk/cli/cli_deploy.py +3 -2
  13. veadk/cli/cli_eval.py +160 -0
  14. veadk/cli/cli_init.py +1 -1
  15. veadk/cli/cli_pipeline.py +220 -0
  16. veadk/cli/cli_prompt.py +4 -4
  17. veadk/cli/cli_web.py +3 -1
  18. veadk/config.py +45 -81
  19. veadk/configs/database_configs.py +117 -0
  20. veadk/configs/model_configs.py +74 -0
  21. veadk/configs/tool_configs.py +42 -0
  22. veadk/configs/tracing_configs.py +110 -0
  23. veadk/consts.py +13 -1
  24. veadk/evaluation/base_evaluator.py +60 -44
  25. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
  26. veadk/evaluation/eval_set_recorder.py +2 -2
  27. veadk/integrations/ve_code_pipeline/__init__.py +13 -0
  28. veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
  29. veadk/integrations/ve_cozeloop/__init__.py +13 -0
  30. veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
  31. veadk/integrations/ve_cr/ve_cr.py +20 -5
  32. veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
  33. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
  34. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
  35. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
  36. veadk/integrations/ve_faas/ve_faas.py +351 -36
  37. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
  38. veadk/integrations/ve_tls/__init__.py +13 -0
  39. veadk/integrations/ve_tls/utils.py +117 -0
  40. veadk/integrations/ve_tls/ve_tls.py +208 -0
  41. veadk/integrations/ve_tos/ve_tos.py +71 -75
  42. veadk/knowledgebase/backends/__init__.py +13 -0
  43. veadk/knowledgebase/backends/base_backend.py +59 -0
  44. veadk/knowledgebase/backends/in_memory_backend.py +82 -0
  45. veadk/knowledgebase/backends/opensearch_backend.py +136 -0
  46. veadk/knowledgebase/backends/redis_backend.py +144 -0
  47. veadk/knowledgebase/backends/utils.py +91 -0
  48. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
  49. veadk/knowledgebase/knowledgebase.py +109 -55
  50. veadk/memory/__init__.py +22 -0
  51. veadk/memory/long_term_memory.py +120 -51
  52. veadk/memory/long_term_memory_backends/__init__.py +13 -0
  53. veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
  54. veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
  55. veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
  56. veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
  57. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
  58. veadk/memory/short_term_memory.py +80 -72
  59. veadk/memory/short_term_memory_backends/__init__.py +13 -0
  60. veadk/memory/short_term_memory_backends/base_backend.py +31 -0
  61. veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
  62. veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
  63. veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
  64. veadk/memory/short_term_memory_processor.py +9 -4
  65. veadk/runner.py +204 -247
  66. veadk/tools/builtin_tools/vesearch.py +2 -2
  67. veadk/tools/builtin_tools/video_generate.py +27 -20
  68. veadk/tools/builtin_tools/web_scraper.py +1 -1
  69. veadk/tools/builtin_tools/web_search.py +7 -7
  70. veadk/tools/load_knowledgebase_tool.py +1 -1
  71. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
  72. veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
  73. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
  74. veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
  75. veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
  76. veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
  77. veadk/tracing/telemetry/telemetry.py +66 -60
  78. veadk/utils/logger.py +1 -1
  79. veadk/utils/misc.py +63 -0
  80. veadk/utils/volcengine_sign.py +6 -2
  81. veadk/version.py +1 -1
  82. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
  83. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
  84. veadk/database/database_adapter.py +0 -368
  85. veadk/database/database_factory.py +0 -80
  86. veadk/database/kv/redis_database.py +0 -159
  87. veadk/database/local_database.py +0 -61
  88. veadk/database/relational/mysql_database.py +0 -173
  89. veadk/database/vector/opensearch_vector_database.py +0 -263
  90. veadk/database/vector/type.py +0 -50
  91. veadk/database/viking/viking_database.py +0 -471
  92. veadk/database/viking/viking_memory_db.py +0 -525
  93. /veadk/{database/kv → auth}/__init__.py +0 -0
  94. /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
  95. /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
  96. /veadk/{database/viking → configs}/__init__.py +0 -0
  97. /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
  98. /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
  99. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
  100. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
  101. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
  102. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,208 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+
17
+ from veadk.consts import DEFAULT_TLS_LOG_PROJECT_NAME, DEFAULT_TLS_TRACING_INSTANCE_NAME
18
+ from veadk.utils.logger import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ class VeTLS:
24
+ def __init__(
25
+ self,
26
+ access_key: str | None = None,
27
+ secret_key: str | None = None,
28
+ region: str = "cn-beijing",
29
+ ):
30
+ try:
31
+ from volcengine.tls.TLSService import TLSService
32
+
33
+ from veadk.integrations.ve_tls.utils import ve_tls_request
34
+ except ImportError:
35
+ raise ImportError(
36
+ "Please install volcengine SDK before init VeTLS: pip install volcengine"
37
+ )
38
+
39
+ self._ve_tls_request = ve_tls_request
40
+
41
+ self.access_key = (
42
+ access_key if access_key else os.getenv("VOLCENGINE_ACCESS_KEY", "")
43
+ )
44
+ self.secret_key = (
45
+ secret_key if secret_key else os.getenv("VOLCENGINE_SECRET_KEY", "")
46
+ )
47
+ self.region = region
48
+
49
+ self._client = TLSService(
50
+ endpoint=f"https://tls-{self.region}.volces.com",
51
+ access_key_id=self.access_key,
52
+ access_key_secret=self.secret_key,
53
+ region=self.region,
54
+ )
55
+
56
+ def get_project_id_by_name(self, project_name: str) -> str:
57
+ """Get the ID of a log project by its name.
58
+
59
+ Args:
60
+ project_name (str): The name of the log project.
61
+
62
+ Returns:
63
+ str: The ID of the log project, or None if not found.
64
+ """
65
+ logger.info(f"Getting ID for log project '{project_name}' in TLS...")
66
+
67
+ request_body = {
68
+ "ProjectName": project_name,
69
+ "IsFullName": True,
70
+ }
71
+
72
+ try:
73
+ res = None
74
+ res = self._ve_tls_request(
75
+ client=self._client,
76
+ api="DescribeProjects",
77
+ body=request_body,
78
+ method="GET",
79
+ )
80
+ projects = res["Projects"]
81
+ for project in projects:
82
+ if project["ProjectName"] == project_name:
83
+ return project["ProjectId"]
84
+ return "<no_project_id_found>"
85
+ except KeyError:
86
+ raise ValueError(f"Failed to get log project ID: {res}")
87
+
88
+ def create_log_project(self, project_name: str) -> str:
89
+ """Create a log project in TLS.
90
+
91
+ Args:
92
+ project_name (str): The name of the log project to create.
93
+
94
+ Returns:
95
+ str: The ID of the created log project.
96
+ """
97
+ logger.info(f"Creating log project '{project_name}' in TLS...")
98
+
99
+ request_body = {
100
+ "ProjectName": project_name,
101
+ "Region": self.region,
102
+ "Description": "Created by Volcengine Agent Development Kit (VeADK)",
103
+ "Tags": [{"Key": "provider", "Value": "VeADK"}],
104
+ }
105
+ try:
106
+ res = self._ve_tls_request(
107
+ client=self._client, api="CreateProject", body=request_body
108
+ )
109
+
110
+ if res["ErrorCode"] == "ProjectAlreadyExist":
111
+ logger.debug(
112
+ f"Log project '{project_name}' already exists. Check its ID."
113
+ )
114
+ return self.get_project_id_by_name(project_name)
115
+
116
+ return res["ProjectId"]
117
+ except KeyError:
118
+ raise ValueError(f"Failed to create log project: {res}")
119
+
120
+ def get_trace_instance_by_name(self, log_project_id: str, trace_instance_name: str):
121
+ logger.info(f"Getting trace instance '{trace_instance_name}' in TLS...")
122
+
123
+ request_body = {
124
+ "PageSize": 100,
125
+ "ProjectId": log_project_id,
126
+ "TraceInstanceName": trace_instance_name,
127
+ }
128
+ try:
129
+ res = self._ve_tls_request(
130
+ client=self._client,
131
+ api="DescribeTraceInstances",
132
+ body=request_body,
133
+ method="GET",
134
+ )
135
+
136
+ for instance in res["TraceInstances"]:
137
+ if instance["TraceInstanceName"] == trace_instance_name:
138
+ return instance
139
+ except KeyError:
140
+ raise ValueError(f"Failed to create log project: {res}")
141
+
142
+ def create_tracing_instance(self, log_project_id: str, trace_instance_name: str):
143
+ """Create a tracing instance in TLS.
144
+
145
+ Args:
146
+ instance_name (str): The name of the tracing instance to create.
147
+
148
+ Returns:
149
+ dict: The tracing instance.
150
+ """
151
+ logger.info(f"Creating tracing instance '{trace_instance_name}' in TLS...")
152
+
153
+ request_body = {
154
+ "ProjectId": log_project_id,
155
+ "TraceInstanceName": trace_instance_name,
156
+ "Description": "Created by Volcengine Agent Development Kit (VeADK)",
157
+ }
158
+
159
+ try:
160
+ res = None
161
+ res = self._ve_tls_request(
162
+ client=self._client,
163
+ api="CreateTraceInstance",
164
+ body=request_body,
165
+ )
166
+
167
+ if res["ErrorCode"] == "TopicAlreadyExist":
168
+ logger.debug(
169
+ f"Log project '{trace_instance_name}' already exists. Check its ID."
170
+ )
171
+ return self.get_trace_instance_by_name(
172
+ log_project_id, trace_instance_name
173
+ )
174
+
175
+ # after creation, get the trace instance details
176
+ res = self._ve_tls_request(
177
+ client=self._client,
178
+ api="DescribeTraceInstance",
179
+ body={"TraceInstanceID": res["TraceInstanceID"]},
180
+ method="GET",
181
+ )
182
+
183
+ return res
184
+ except KeyError:
185
+ raise ValueError(f"Failed to create tracing instance: {res}")
186
+
187
+ def get_trace_topic_id(self):
188
+ """Get the trace topic ID under VeADK default names.
189
+
190
+ This method is a tool function just designed for `veadk/config.py`.
191
+
192
+ Returns:
193
+ str: The trace topic ID.
194
+ """
195
+ logger.info("Getting trace topic ID for tracing instance in TLS...")
196
+
197
+ log_project_id = self.create_log_project(DEFAULT_TLS_LOG_PROJECT_NAME)
198
+
199
+ instance = self.create_tracing_instance(
200
+ log_project_id, DEFAULT_TLS_TRACING_INSTANCE_NAME
201
+ )
202
+
203
+ if not instance:
204
+ raise ValueError("None instance")
205
+
206
+ logger.info(f"Fetched trace topic id: {instance['TraceTopicId']}")
207
+
208
+ return instance["TraceTopicId"]
@@ -12,75 +12,73 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import os
16
- from veadk.config import getenv
17
- from veadk.utils.logger import get_logger
18
15
  import asyncio
19
- from typing import Union
20
- from pydantic import BaseModel, Field
21
- from typing import Any
22
- from urllib.parse import urlparse
16
+ import os
23
17
  from datetime import datetime
18
+ from typing import TYPE_CHECKING, Union
19
+ from urllib.parse import urlparse
20
+
21
+ from veadk.consts import DEFAULT_TOS_BUCKET_NAME
22
+ from veadk.utils.logger import get_logger
23
+ from veadk.utils.misc import getenv
24
+
25
+ if TYPE_CHECKING:
26
+ pass
27
+
24
28
 
25
29
  # Initialize logger before using it
26
30
  logger = get_logger(__name__)
27
31
 
28
- # Try to import tos module, and provide helpful error message if it fails
29
- try:
30
- import tos
31
- except ImportError as e:
32
- logger.error(
33
- "Failed to import 'tos' module. Please install it using: pip install tos\n"
34
- )
35
- raise ImportError(
36
- "Missing 'tos' module. Please install it using: pip install tos\n"
37
- ) from e
38
-
39
-
40
- class TOSConfig(BaseModel):
41
- region: str = Field(
42
- default_factory=lambda: getenv("DATABASE_TOS_REGION"),
43
- description="TOS region",
44
- )
45
- ak: str = Field(
46
- default_factory=lambda: getenv("VOLCENGINE_ACCESS_KEY"),
47
- description="Volcengine access key",
48
- )
49
- sk: str = Field(
50
- default_factory=lambda: getenv("VOLCENGINE_SECRET_KEY"),
51
- description="Volcengine secret key",
52
- )
53
- bucket_name: str = Field(
54
- default_factory=lambda: getenv("DATABASE_TOS_BUCKET"),
55
- description="TOS bucket name",
56
- )
57
-
58
-
59
- class VeTOS(BaseModel):
60
- config: TOSConfig = Field(default_factory=TOSConfig)
61
-
62
- def model_post_init(self, __context: Any) -> None:
32
+
33
+ class VeTOS:
34
+ def __init__(
35
+ self,
36
+ ak: str = "",
37
+ sk: str = "",
38
+ region: str = "cn-beijing",
39
+ bucket_name: str = DEFAULT_TOS_BUCKET_NAME,
40
+ ) -> None:
41
+ self.ak = ak if ak else os.getenv("VOLCENGINE_ACCESS_KEY", "")
42
+ self.sk = sk if sk else os.getenv("VOLCENGINE_SECRET_KEY", "")
43
+ self.region = region
44
+ self.bucket_name = (
45
+ bucket_name if bucket_name else getenv("", DEFAULT_TOS_BUCKET_NAME)
46
+ )
47
+ self._tos_module = None
48
+
49
+ try:
50
+ import tos
51
+
52
+ self._tos_module = tos
53
+ except ImportError as e:
54
+ logger.error(
55
+ "Failed to import 'tos' module. Please install it using: pip install tos\n"
56
+ )
57
+ raise ImportError(
58
+ "Missing 'tos' module. Please install it using: pip install tos\n"
59
+ ) from e
60
+
61
+ self._client = None
63
62
  try:
64
- self._client = tos.TosClientV2(
65
- self.config.ak,
66
- self.config.sk,
67
- endpoint=f"tos-{self.config.region}.volces.com",
68
- region=self.config.region,
63
+ self._client = self._tos_module.TosClientV2(
64
+ ak=self.ak,
65
+ sk=self.sk,
66
+ endpoint=f"tos-{self.region}.volces.com",
67
+ region=self.region,
69
68
  )
70
- logger.info("Connected to TOS successfully.")
69
+ logger.info("Init TOS client.")
71
70
  except Exception as e:
72
71
  logger.error(f"Client initialization failed:{e}")
73
- self._client = None
74
72
 
75
73
  def _refresh_client(self):
76
74
  try:
77
75
  if self._client:
78
76
  self._client.close()
79
- self._client = tos.TosClientV2(
80
- self.config.ak,
81
- self.config.sk,
82
- endpoint=f"tos-{self.config.region}.volces.com",
83
- region=self.config.region,
77
+ self._client = self._tos_module.TosClientV2(
78
+ self.ak,
79
+ self.sk,
80
+ endpoint=f"tos-{self.region}.volces.com",
81
+ region=self.region,
84
82
  )
85
83
  logger.info("refreshed client successfully.")
86
84
  except Exception as e:
@@ -93,19 +91,17 @@ class VeTOS(BaseModel):
93
91
  logger.error("TOS client is not initialized")
94
92
  return False
95
93
  try:
96
- self._client.head_bucket(self.config.bucket_name)
97
- logger.info(f"Bucket {self.config.bucket_name} already exists")
98
- except tos.exceptions.TosServerError as e:
94
+ self._client.head_bucket(self.bucket_name)
95
+ logger.info(f"Bucket {self.bucket_name} already exists")
96
+ except self._tos_module.exceptions.TosServerError as e:
99
97
  if e.status_code == 404:
100
98
  try:
101
99
  self._client.create_bucket(
102
- bucket=self.config.bucket_name,
103
- storage_class=tos.StorageClassType.Storage_Class_Standard,
104
- acl=tos.ACLType.ACL_Public_Read, # 公开读
105
- )
106
- logger.info(
107
- f"Bucket {self.config.bucket_name} created successfully"
100
+ bucket=self.bucket_name,
101
+ storage_class=self._tos_module.StorageClassType.Storage_Class_Standard,
102
+ acl=self._tos_module.ACLType.ACL_Public_Read,
108
103
  )
104
+ logger.info(f"Bucket {self.bucket_name} created successfully")
109
105
  self._refresh_client()
110
106
  except Exception as create_error:
111
107
  logger.error(f"Bucket creation failed: {str(create_error)}")
@@ -117,7 +113,7 @@ class VeTOS(BaseModel):
117
113
  logger.error(f"Bucket check failed: {str(e)}")
118
114
  return False
119
115
 
120
- # 确保在所有路径上返回布尔值
116
+ # ensure return bool type
121
117
  return self._set_cors_rules()
122
118
 
123
119
  def _set_cors_rules(self) -> bool:
@@ -125,20 +121,18 @@ class VeTOS(BaseModel):
125
121
  logger.error("TOS client is not initialized")
126
122
  return False
127
123
  try:
128
- rule = tos.models2.CORSRule(
124
+ rule = self._tos_module.models2.CORSRule(
129
125
  allowed_origins=["*"],
130
126
  allowed_methods=["GET", "HEAD"],
131
127
  allowed_headers=["*"],
132
128
  max_age_seconds=1000,
133
129
  )
134
- self._client.put_bucket_cors(self.config.bucket_name, [rule])
135
- logger.info(
136
- f"CORS rules for bucket {self.config.bucket_name} set successfully"
137
- )
130
+ self._client.put_bucket_cors(self.bucket_name, [rule])
131
+ logger.info(f"CORS rules for bucket {self.bucket_name} set successfully")
138
132
  return True
139
133
  except Exception as e:
140
134
  logger.error(
141
- f"Failed to set CORS rules for bucket {self.config.bucket_name}: {str(e)}"
135
+ f"Failed to set CORS rules for bucket {self.bucket_name}: {str(e)}"
142
136
  )
143
137
  return False
144
138
 
@@ -155,7 +149,9 @@ class VeTOS(BaseModel):
155
149
 
156
150
  timestamp: str = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3]
157
151
  object_key: str = f"{app_name}-{user_id}-{session_id}/{timestamp}-{file_name}"
158
- tos_url: str = f"https://{self.config.bucket_name}.tos-{self.config.region}.volces.com/{object_key}"
152
+ tos_url: str = (
153
+ f"https://{self.bucket_name}.tos-{self.region}.volces.com/{object_key}"
154
+ )
159
155
 
160
156
  return object_key, tos_url
161
157
 
@@ -182,9 +178,9 @@ class VeTOS(BaseModel):
182
178
  if not self.create_bucket():
183
179
  return
184
180
  self._client.put_object(
185
- bucket=self.config.bucket_name, key=object_key, content=data
181
+ bucket=self.bucket_name, key=object_key, content=data
186
182
  )
187
- logger.debug(f"Upload success, object_key: {object_key}")
183
+ logger.debug(f"Upload success, url: {object_key}")
188
184
  self._close()
189
185
  return
190
186
  except Exception as e:
@@ -199,7 +195,7 @@ class VeTOS(BaseModel):
199
195
  if not self.create_bucket():
200
196
  return
201
197
  self._client.put_object_from_file(
202
- bucket=self.config.bucket_name, key=object_key, file_path=file_path
198
+ bucket=self.bucket_name, key=object_key, file_path=file_path
203
199
  )
204
200
  self._close()
205
201
  logger.debug(f"Upload success, object_key: {object_key}")
@@ -215,7 +211,7 @@ class VeTOS(BaseModel):
215
211
  logger.error("TOS client is not initialized")
216
212
  return False
217
213
  try:
218
- object_stream = self._client.get_object(self.config.bucket_name, object_key)
214
+ object_stream = self._client.get_object(self.bucket_name, object_key)
219
215
 
220
216
  save_dir = os.path.dirname(save_path)
221
217
  if save_dir and not os.path.exists(save_dir):
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,59 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+
17
+ from pydantic import BaseModel
18
+
19
+
20
+ class BaseKnowledgebaseBackend(ABC, BaseModel):
21
+ index: str
22
+ """Index or collection name of the vector storage."""
23
+
24
+ @abstractmethod
25
+ def precheck_index_naming(self) -> None:
26
+ """Check the index name is valid or not.
27
+
28
+ If index naming is not valid, raise an exception.
29
+ """
30
+
31
+ @abstractmethod
32
+ def add_from_directory(self, directory: str, **kwargs) -> bool:
33
+ """Add knowledge from file path to knowledgebase"""
34
+
35
+ @abstractmethod
36
+ def add_from_files(self, files: list[str], **kwargs) -> bool:
37
+ """Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
38
+
39
+ @abstractmethod
40
+ def add_from_text(self, text: str | list[str], **kwargs) -> bool:
41
+ """Add knowledge from text to knowledgebase"""
42
+
43
+ @abstractmethod
44
+ def search(self, **kwargs) -> list:
45
+ """Search knowledge from knowledgebase"""
46
+
47
+ # Optional methods for future use:
48
+ # - `delete`: Delete collection or documents
49
+ # - `list_docs`: List original documents
50
+ # - `list_chunks`: List embedded document chunks
51
+
52
+ # def delete(self, **kwargs) -> bool:
53
+ # """Delete knowledge from knowledgebase"""
54
+
55
+ # def list_docs(self, **kwargs) -> None:
56
+ # """List original documents in knowledgebase"""
57
+
58
+ # def list_chunks(self, **kwargs) -> None:
59
+ # """List embeded document chunks in knowledgebase"""
@@ -0,0 +1,82 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex
16
+ from llama_index.core.schema import BaseNode
17
+ from llama_index.embeddings.openai_like import OpenAILikeEmbedding
18
+ from pydantic import Field
19
+ from typing_extensions import Any, override
20
+
21
+ from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
22
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
23
+ from veadk.knowledgebase.backends.utils import get_llama_index_splitter
24
+
25
+
26
+ class InMemoryKnowledgeBackend(BaseKnowledgebaseBackend):
27
+ embedding_config: NormalEmbeddingModelConfig | EmbeddingModelConfig = Field(
28
+ default_factory=EmbeddingModelConfig
29
+ )
30
+ """Embedding model configs"""
31
+
32
+ def model_post_init(self, __context: Any) -> None:
33
+ self._embed_model = OpenAILikeEmbedding(
34
+ model_name=self.embedding_config.name,
35
+ api_key=self.embedding_config.api_key,
36
+ api_base=self.embedding_config.api_base,
37
+ )
38
+ self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
39
+
40
+ @override
41
+ def precheck_index_naming(self) -> None:
42
+ # Checking is not needed
43
+ pass
44
+
45
+ @override
46
+ def add_from_directory(self, directory: str) -> bool:
47
+ documents = SimpleDirectoryReader(input_dir=directory).load_data()
48
+ nodes = self._split_documents(documents)
49
+ self._vector_index.insert_nodes(nodes)
50
+ return True
51
+
52
+ @override
53
+ def add_from_files(self, files: list[str]) -> bool:
54
+ documents = SimpleDirectoryReader(input_files=files).load_data()
55
+ nodes = self._split_documents(documents)
56
+ self._vector_index.insert_nodes(nodes)
57
+ return True
58
+
59
+ @override
60
+ def add_from_text(self, text: str | list[str]) -> bool:
61
+ if isinstance(text, str):
62
+ documents = [Document(text=text)]
63
+ else:
64
+ documents = [Document(text=t) for t in text]
65
+ nodes = self._split_documents(documents)
66
+ self._vector_index.insert_nodes(nodes)
67
+ return True
68
+
69
+ @override
70
+ def search(self, query: str, top_k: int = 5) -> list[str]:
71
+ _retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
72
+ retrieved_nodes = _retriever.retrieve(query)
73
+ return [node.text for node in retrieved_nodes]
74
+
75
+ def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
76
+ """Split document into chunks"""
77
+ nodes = []
78
+ for document in documents:
79
+ splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
80
+ _nodes = splitter.get_nodes_from_documents([document])
81
+ nodes.extend(_nodes)
82
+ return nodes