veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/agent.py +11 -18
- veadk/agent_builder.py +94 -0
- veadk/{database/__init__.py → auth/base_auth.py} +7 -2
- veadk/auth/veauth/apmplus_veauth.py +65 -0
- veadk/auth/veauth/ark_veauth.py +77 -0
- veadk/auth/veauth/base_veauth.py +50 -0
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
- veadk/auth/veauth/vesearch_veauth.py +62 -0
- veadk/cli/cli.py +4 -0
- veadk/cli/cli_deploy.py +3 -2
- veadk/cli/cli_eval.py +160 -0
- veadk/cli/cli_init.py +1 -1
- veadk/cli/cli_pipeline.py +220 -0
- veadk/cli/cli_prompt.py +4 -4
- veadk/cli/cli_web.py +3 -1
- veadk/config.py +45 -81
- veadk/configs/database_configs.py +117 -0
- veadk/configs/model_configs.py +74 -0
- veadk/configs/tool_configs.py +42 -0
- veadk/configs/tracing_configs.py +110 -0
- veadk/consts.py +13 -1
- veadk/evaluation/base_evaluator.py +60 -44
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
- veadk/evaluation/eval_set_recorder.py +2 -2
- veadk/integrations/ve_code_pipeline/__init__.py +13 -0
- veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
- veadk/integrations/ve_cozeloop/__init__.py +13 -0
- veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
- veadk/integrations/ve_cr/ve_cr.py +20 -5
- veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
- veadk/integrations/ve_faas/ve_faas.py +351 -36
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
- veadk/integrations/ve_tls/__init__.py +13 -0
- veadk/integrations/ve_tls/utils.py +117 -0
- veadk/integrations/ve_tls/ve_tls.py +208 -0
- veadk/integrations/ve_tos/ve_tos.py +71 -75
- veadk/knowledgebase/backends/__init__.py +13 -0
- veadk/knowledgebase/backends/base_backend.py +59 -0
- veadk/knowledgebase/backends/in_memory_backend.py +82 -0
- veadk/knowledgebase/backends/opensearch_backend.py +136 -0
- veadk/knowledgebase/backends/redis_backend.py +144 -0
- veadk/knowledgebase/backends/utils.py +91 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
- veadk/knowledgebase/knowledgebase.py +109 -55
- veadk/memory/__init__.py +22 -0
- veadk/memory/long_term_memory.py +120 -51
- veadk/memory/long_term_memory_backends/__init__.py +13 -0
- veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
- veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
- veadk/memory/short_term_memory.py +80 -72
- veadk/memory/short_term_memory_backends/__init__.py +13 -0
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
- veadk/memory/short_term_memory_processor.py +9 -4
- veadk/runner.py +204 -247
- veadk/tools/builtin_tools/vesearch.py +2 -2
- veadk/tools/builtin_tools/video_generate.py +27 -20
- veadk/tools/builtin_tools/web_scraper.py +1 -1
- veadk/tools/builtin_tools/web_search.py +7 -7
- veadk/tools/load_knowledgebase_tool.py +1 -1
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
- veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
- veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
- veadk/tracing/telemetry/telemetry.py +66 -60
- veadk/utils/logger.py +1 -1
- veadk/utils/misc.py +63 -0
- veadk/utils/volcengine_sign.py +6 -2
- veadk/version.py +1 -1
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
- veadk/database/database_adapter.py +0 -368
- veadk/database/database_factory.py +0 -80
- veadk/database/kv/redis_database.py +0 -159
- veadk/database/local_database.py +0 -61
- veadk/database/relational/mysql_database.py +0 -173
- veadk/database/vector/opensearch_vector_database.py +0 -263
- veadk/database/vector/type.py +0 -50
- veadk/database/viking/viking_database.py +0 -471
- veadk/database/viking/viking_memory_db.py +0 -525
- /veadk/{database/kv → auth}/__init__.py +0 -0
- /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
- /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
- /veadk/{database/viking → configs}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
from veadk.consts import DEFAULT_TLS_LOG_PROJECT_NAME, DEFAULT_TLS_TRACING_INSTANCE_NAME
|
|
18
|
+
from veadk.utils.logger import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class VeTLS:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
access_key: str | None = None,
|
|
27
|
+
secret_key: str | None = None,
|
|
28
|
+
region: str = "cn-beijing",
|
|
29
|
+
):
|
|
30
|
+
try:
|
|
31
|
+
from volcengine.tls.TLSService import TLSService
|
|
32
|
+
|
|
33
|
+
from veadk.integrations.ve_tls.utils import ve_tls_request
|
|
34
|
+
except ImportError:
|
|
35
|
+
raise ImportError(
|
|
36
|
+
"Please install volcengine SDK before init VeTLS: pip install volcengine"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
self._ve_tls_request = ve_tls_request
|
|
40
|
+
|
|
41
|
+
self.access_key = (
|
|
42
|
+
access_key if access_key else os.getenv("VOLCENGINE_ACCESS_KEY", "")
|
|
43
|
+
)
|
|
44
|
+
self.secret_key = (
|
|
45
|
+
secret_key if secret_key else os.getenv("VOLCENGINE_SECRET_KEY", "")
|
|
46
|
+
)
|
|
47
|
+
self.region = region
|
|
48
|
+
|
|
49
|
+
self._client = TLSService(
|
|
50
|
+
endpoint=f"https://tls-{self.region}.volces.com",
|
|
51
|
+
access_key_id=self.access_key,
|
|
52
|
+
access_key_secret=self.secret_key,
|
|
53
|
+
region=self.region,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def get_project_id_by_name(self, project_name: str) -> str:
|
|
57
|
+
"""Get the ID of a log project by its name.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
project_name (str): The name of the log project.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
str: The ID of the log project, or None if not found.
|
|
64
|
+
"""
|
|
65
|
+
logger.info(f"Getting ID for log project '{project_name}' in TLS...")
|
|
66
|
+
|
|
67
|
+
request_body = {
|
|
68
|
+
"ProjectName": project_name,
|
|
69
|
+
"IsFullName": True,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
res = None
|
|
74
|
+
res = self._ve_tls_request(
|
|
75
|
+
client=self._client,
|
|
76
|
+
api="DescribeProjects",
|
|
77
|
+
body=request_body,
|
|
78
|
+
method="GET",
|
|
79
|
+
)
|
|
80
|
+
projects = res["Projects"]
|
|
81
|
+
for project in projects:
|
|
82
|
+
if project["ProjectName"] == project_name:
|
|
83
|
+
return project["ProjectId"]
|
|
84
|
+
return "<no_project_id_found>"
|
|
85
|
+
except KeyError:
|
|
86
|
+
raise ValueError(f"Failed to get log project ID: {res}")
|
|
87
|
+
|
|
88
|
+
def create_log_project(self, project_name: str) -> str:
|
|
89
|
+
"""Create a log project in TLS.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
project_name (str): The name of the log project to create.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
str: The ID of the created log project.
|
|
96
|
+
"""
|
|
97
|
+
logger.info(f"Creating log project '{project_name}' in TLS...")
|
|
98
|
+
|
|
99
|
+
request_body = {
|
|
100
|
+
"ProjectName": project_name,
|
|
101
|
+
"Region": self.region,
|
|
102
|
+
"Description": "Created by Volcengine Agent Development Kit (VeADK)",
|
|
103
|
+
"Tags": [{"Key": "provider", "Value": "VeADK"}],
|
|
104
|
+
}
|
|
105
|
+
try:
|
|
106
|
+
res = self._ve_tls_request(
|
|
107
|
+
client=self._client, api="CreateProject", body=request_body
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if res["ErrorCode"] == "ProjectAlreadyExist":
|
|
111
|
+
logger.debug(
|
|
112
|
+
f"Log project '{project_name}' already exists. Check its ID."
|
|
113
|
+
)
|
|
114
|
+
return self.get_project_id_by_name(project_name)
|
|
115
|
+
|
|
116
|
+
return res["ProjectId"]
|
|
117
|
+
except KeyError:
|
|
118
|
+
raise ValueError(f"Failed to create log project: {res}")
|
|
119
|
+
|
|
120
|
+
def get_trace_instance_by_name(self, log_project_id: str, trace_instance_name: str):
|
|
121
|
+
logger.info(f"Getting trace instance '{trace_instance_name}' in TLS...")
|
|
122
|
+
|
|
123
|
+
request_body = {
|
|
124
|
+
"PageSize": 100,
|
|
125
|
+
"ProjectId": log_project_id,
|
|
126
|
+
"TraceInstanceName": trace_instance_name,
|
|
127
|
+
}
|
|
128
|
+
try:
|
|
129
|
+
res = self._ve_tls_request(
|
|
130
|
+
client=self._client,
|
|
131
|
+
api="DescribeTraceInstances",
|
|
132
|
+
body=request_body,
|
|
133
|
+
method="GET",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
for instance in res["TraceInstances"]:
|
|
137
|
+
if instance["TraceInstanceName"] == trace_instance_name:
|
|
138
|
+
return instance
|
|
139
|
+
except KeyError:
|
|
140
|
+
raise ValueError(f"Failed to create log project: {res}")
|
|
141
|
+
|
|
142
|
+
def create_tracing_instance(self, log_project_id: str, trace_instance_name: str):
|
|
143
|
+
"""Create a tracing instance in TLS.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
instance_name (str): The name of the tracing instance to create.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
dict: The tracing instance.
|
|
150
|
+
"""
|
|
151
|
+
logger.info(f"Creating tracing instance '{trace_instance_name}' in TLS...")
|
|
152
|
+
|
|
153
|
+
request_body = {
|
|
154
|
+
"ProjectId": log_project_id,
|
|
155
|
+
"TraceInstanceName": trace_instance_name,
|
|
156
|
+
"Description": "Created by Volcengine Agent Development Kit (VeADK)",
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
res = None
|
|
161
|
+
res = self._ve_tls_request(
|
|
162
|
+
client=self._client,
|
|
163
|
+
api="CreateTraceInstance",
|
|
164
|
+
body=request_body,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if res["ErrorCode"] == "TopicAlreadyExist":
|
|
168
|
+
logger.debug(
|
|
169
|
+
f"Log project '{trace_instance_name}' already exists. Check its ID."
|
|
170
|
+
)
|
|
171
|
+
return self.get_trace_instance_by_name(
|
|
172
|
+
log_project_id, trace_instance_name
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# after creation, get the trace instance details
|
|
176
|
+
res = self._ve_tls_request(
|
|
177
|
+
client=self._client,
|
|
178
|
+
api="DescribeTraceInstance",
|
|
179
|
+
body={"TraceInstanceID": res["TraceInstanceID"]},
|
|
180
|
+
method="GET",
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
return res
|
|
184
|
+
except KeyError:
|
|
185
|
+
raise ValueError(f"Failed to create tracing instance: {res}")
|
|
186
|
+
|
|
187
|
+
def get_trace_topic_id(self):
|
|
188
|
+
"""Get the trace topic ID under VeADK default names.
|
|
189
|
+
|
|
190
|
+
This method is a tool function just designed for `veadk/config.py`.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
str: The trace topic ID.
|
|
194
|
+
"""
|
|
195
|
+
logger.info("Getting trace topic ID for tracing instance in TLS...")
|
|
196
|
+
|
|
197
|
+
log_project_id = self.create_log_project(DEFAULT_TLS_LOG_PROJECT_NAME)
|
|
198
|
+
|
|
199
|
+
instance = self.create_tracing_instance(
|
|
200
|
+
log_project_id, DEFAULT_TLS_TRACING_INSTANCE_NAME
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if not instance:
|
|
204
|
+
raise ValueError("None instance")
|
|
205
|
+
|
|
206
|
+
logger.info(f"Fetched trace topic id: {instance['TraceTopicId']}")
|
|
207
|
+
|
|
208
|
+
return instance["TraceTopicId"]
|
|
@@ -12,75 +12,73 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import os
|
|
16
|
-
from veadk.config import getenv
|
|
17
|
-
from veadk.utils.logger import get_logger
|
|
18
15
|
import asyncio
|
|
19
|
-
|
|
20
|
-
from pydantic import BaseModel, Field
|
|
21
|
-
from typing import Any
|
|
22
|
-
from urllib.parse import urlparse
|
|
16
|
+
import os
|
|
23
17
|
from datetime import datetime
|
|
18
|
+
from typing import TYPE_CHECKING, Union
|
|
19
|
+
from urllib.parse import urlparse
|
|
20
|
+
|
|
21
|
+
from veadk.consts import DEFAULT_TOS_BUCKET_NAME
|
|
22
|
+
from veadk.utils.logger import get_logger
|
|
23
|
+
from veadk.utils.misc import getenv
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
pass
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
# Initialize logger before using it
|
|
26
30
|
logger = get_logger(__name__)
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class VeTOS(BaseModel):
|
|
60
|
-
config: TOSConfig = Field(default_factory=TOSConfig)
|
|
61
|
-
|
|
62
|
-
def model_post_init(self, __context: Any) -> None:
|
|
32
|
+
|
|
33
|
+
class VeTOS:
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
ak: str = "",
|
|
37
|
+
sk: str = "",
|
|
38
|
+
region: str = "cn-beijing",
|
|
39
|
+
bucket_name: str = DEFAULT_TOS_BUCKET_NAME,
|
|
40
|
+
) -> None:
|
|
41
|
+
self.ak = ak if ak else os.getenv("VOLCENGINE_ACCESS_KEY", "")
|
|
42
|
+
self.sk = sk if sk else os.getenv("VOLCENGINE_SECRET_KEY", "")
|
|
43
|
+
self.region = region
|
|
44
|
+
self.bucket_name = (
|
|
45
|
+
bucket_name if bucket_name else getenv("", DEFAULT_TOS_BUCKET_NAME)
|
|
46
|
+
)
|
|
47
|
+
self._tos_module = None
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
import tos
|
|
51
|
+
|
|
52
|
+
self._tos_module = tos
|
|
53
|
+
except ImportError as e:
|
|
54
|
+
logger.error(
|
|
55
|
+
"Failed to import 'tos' module. Please install it using: pip install tos\n"
|
|
56
|
+
)
|
|
57
|
+
raise ImportError(
|
|
58
|
+
"Missing 'tos' module. Please install it using: pip install tos\n"
|
|
59
|
+
) from e
|
|
60
|
+
|
|
61
|
+
self._client = None
|
|
63
62
|
try:
|
|
64
|
-
self._client =
|
|
65
|
-
self.
|
|
66
|
-
self.
|
|
67
|
-
endpoint=f"tos-{self.
|
|
68
|
-
region=self.
|
|
63
|
+
self._client = self._tos_module.TosClientV2(
|
|
64
|
+
ak=self.ak,
|
|
65
|
+
sk=self.sk,
|
|
66
|
+
endpoint=f"tos-{self.region}.volces.com",
|
|
67
|
+
region=self.region,
|
|
69
68
|
)
|
|
70
|
-
logger.info("
|
|
69
|
+
logger.info("Init TOS client.")
|
|
71
70
|
except Exception as e:
|
|
72
71
|
logger.error(f"Client initialization failed:{e}")
|
|
73
|
-
self._client = None
|
|
74
72
|
|
|
75
73
|
def _refresh_client(self):
|
|
76
74
|
try:
|
|
77
75
|
if self._client:
|
|
78
76
|
self._client.close()
|
|
79
|
-
self._client =
|
|
80
|
-
self.
|
|
81
|
-
self.
|
|
82
|
-
endpoint=f"tos-{self.
|
|
83
|
-
region=self.
|
|
77
|
+
self._client = self._tos_module.TosClientV2(
|
|
78
|
+
self.ak,
|
|
79
|
+
self.sk,
|
|
80
|
+
endpoint=f"tos-{self.region}.volces.com",
|
|
81
|
+
region=self.region,
|
|
84
82
|
)
|
|
85
83
|
logger.info("refreshed client successfully.")
|
|
86
84
|
except Exception as e:
|
|
@@ -93,19 +91,17 @@ class VeTOS(BaseModel):
|
|
|
93
91
|
logger.error("TOS client is not initialized")
|
|
94
92
|
return False
|
|
95
93
|
try:
|
|
96
|
-
self._client.head_bucket(self.
|
|
97
|
-
logger.info(f"Bucket {self.
|
|
98
|
-
except
|
|
94
|
+
self._client.head_bucket(self.bucket_name)
|
|
95
|
+
logger.info(f"Bucket {self.bucket_name} already exists")
|
|
96
|
+
except self._tos_module.exceptions.TosServerError as e:
|
|
99
97
|
if e.status_code == 404:
|
|
100
98
|
try:
|
|
101
99
|
self._client.create_bucket(
|
|
102
|
-
bucket=self.
|
|
103
|
-
storage_class=
|
|
104
|
-
acl=
|
|
105
|
-
)
|
|
106
|
-
logger.info(
|
|
107
|
-
f"Bucket {self.config.bucket_name} created successfully"
|
|
100
|
+
bucket=self.bucket_name,
|
|
101
|
+
storage_class=self._tos_module.StorageClassType.Storage_Class_Standard,
|
|
102
|
+
acl=self._tos_module.ACLType.ACL_Public_Read,
|
|
108
103
|
)
|
|
104
|
+
logger.info(f"Bucket {self.bucket_name} created successfully")
|
|
109
105
|
self._refresh_client()
|
|
110
106
|
except Exception as create_error:
|
|
111
107
|
logger.error(f"Bucket creation failed: {str(create_error)}")
|
|
@@ -117,7 +113,7 @@ class VeTOS(BaseModel):
|
|
|
117
113
|
logger.error(f"Bucket check failed: {str(e)}")
|
|
118
114
|
return False
|
|
119
115
|
|
|
120
|
-
#
|
|
116
|
+
# ensure return bool type
|
|
121
117
|
return self._set_cors_rules()
|
|
122
118
|
|
|
123
119
|
def _set_cors_rules(self) -> bool:
|
|
@@ -125,20 +121,18 @@ class VeTOS(BaseModel):
|
|
|
125
121
|
logger.error("TOS client is not initialized")
|
|
126
122
|
return False
|
|
127
123
|
try:
|
|
128
|
-
rule =
|
|
124
|
+
rule = self._tos_module.models2.CORSRule(
|
|
129
125
|
allowed_origins=["*"],
|
|
130
126
|
allowed_methods=["GET", "HEAD"],
|
|
131
127
|
allowed_headers=["*"],
|
|
132
128
|
max_age_seconds=1000,
|
|
133
129
|
)
|
|
134
|
-
self._client.put_bucket_cors(self.
|
|
135
|
-
logger.info(
|
|
136
|
-
f"CORS rules for bucket {self.config.bucket_name} set successfully"
|
|
137
|
-
)
|
|
130
|
+
self._client.put_bucket_cors(self.bucket_name, [rule])
|
|
131
|
+
logger.info(f"CORS rules for bucket {self.bucket_name} set successfully")
|
|
138
132
|
return True
|
|
139
133
|
except Exception as e:
|
|
140
134
|
logger.error(
|
|
141
|
-
f"Failed to set CORS rules for bucket {self.
|
|
135
|
+
f"Failed to set CORS rules for bucket {self.bucket_name}: {str(e)}"
|
|
142
136
|
)
|
|
143
137
|
return False
|
|
144
138
|
|
|
@@ -155,7 +149,9 @@ class VeTOS(BaseModel):
|
|
|
155
149
|
|
|
156
150
|
timestamp: str = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3]
|
|
157
151
|
object_key: str = f"{app_name}-{user_id}-{session_id}/{timestamp}-{file_name}"
|
|
158
|
-
tos_url: str =
|
|
152
|
+
tos_url: str = (
|
|
153
|
+
f"https://{self.bucket_name}.tos-{self.region}.volces.com/{object_key}"
|
|
154
|
+
)
|
|
159
155
|
|
|
160
156
|
return object_key, tos_url
|
|
161
157
|
|
|
@@ -182,9 +178,9 @@ class VeTOS(BaseModel):
|
|
|
182
178
|
if not self.create_bucket():
|
|
183
179
|
return
|
|
184
180
|
self._client.put_object(
|
|
185
|
-
bucket=self.
|
|
181
|
+
bucket=self.bucket_name, key=object_key, content=data
|
|
186
182
|
)
|
|
187
|
-
logger.debug(f"Upload success,
|
|
183
|
+
logger.debug(f"Upload success, url: {object_key}")
|
|
188
184
|
self._close()
|
|
189
185
|
return
|
|
190
186
|
except Exception as e:
|
|
@@ -199,7 +195,7 @@ class VeTOS(BaseModel):
|
|
|
199
195
|
if not self.create_bucket():
|
|
200
196
|
return
|
|
201
197
|
self._client.put_object_from_file(
|
|
202
|
-
bucket=self.
|
|
198
|
+
bucket=self.bucket_name, key=object_key, file_path=file_path
|
|
203
199
|
)
|
|
204
200
|
self._close()
|
|
205
201
|
logger.debug(f"Upload success, object_key: {object_key}")
|
|
@@ -215,7 +211,7 @@ class VeTOS(BaseModel):
|
|
|
215
211
|
logger.error("TOS client is not initialized")
|
|
216
212
|
return False
|
|
217
213
|
try:
|
|
218
|
-
object_stream = self._client.get_object(self.
|
|
214
|
+
object_stream = self._client.get_object(self.bucket_name, object_key)
|
|
219
215
|
|
|
220
216
|
save_dir = os.path.dirname(save_path)
|
|
221
217
|
if save_dir and not os.path.exists(save_dir):
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseKnowledgebaseBackend(ABC, BaseModel):
|
|
21
|
+
index: str
|
|
22
|
+
"""Index or collection name of the vector storage."""
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def precheck_index_naming(self) -> None:
|
|
26
|
+
"""Check the index name is valid or not.
|
|
27
|
+
|
|
28
|
+
If index naming is not valid, raise an exception.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def add_from_directory(self, directory: str, **kwargs) -> bool:
|
|
33
|
+
"""Add knowledge from file path to knowledgebase"""
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def add_from_files(self, files: list[str], **kwargs) -> bool:
|
|
37
|
+
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
|
|
41
|
+
"""Add knowledge from text to knowledgebase"""
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def search(self, **kwargs) -> list:
|
|
45
|
+
"""Search knowledge from knowledgebase"""
|
|
46
|
+
|
|
47
|
+
# Optional methods for future use:
|
|
48
|
+
# - `delete`: Delete collection or documents
|
|
49
|
+
# - `list_docs`: List original documents
|
|
50
|
+
# - `list_chunks`: List embedded document chunks
|
|
51
|
+
|
|
52
|
+
# def delete(self, **kwargs) -> bool:
|
|
53
|
+
# """Delete knowledge from knowledgebase"""
|
|
54
|
+
|
|
55
|
+
# def list_docs(self, **kwargs) -> None:
|
|
56
|
+
# """List original documents in knowledgebase"""
|
|
57
|
+
|
|
58
|
+
# def list_chunks(self, **kwargs) -> None:
|
|
59
|
+
# """List embeded document chunks in knowledgebase"""
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex
|
|
16
|
+
from llama_index.core.schema import BaseNode
|
|
17
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
from typing_extensions import Any, override
|
|
20
|
+
|
|
21
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
22
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
23
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class InMemoryKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
27
|
+
embedding_config: NormalEmbeddingModelConfig | EmbeddingModelConfig = Field(
|
|
28
|
+
default_factory=EmbeddingModelConfig
|
|
29
|
+
)
|
|
30
|
+
"""Embedding model configs"""
|
|
31
|
+
|
|
32
|
+
def model_post_init(self, __context: Any) -> None:
|
|
33
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
34
|
+
model_name=self.embedding_config.name,
|
|
35
|
+
api_key=self.embedding_config.api_key,
|
|
36
|
+
api_base=self.embedding_config.api_base,
|
|
37
|
+
)
|
|
38
|
+
self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
|
|
39
|
+
|
|
40
|
+
@override
|
|
41
|
+
def precheck_index_naming(self) -> None:
|
|
42
|
+
# Checking is not needed
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
@override
|
|
46
|
+
def add_from_directory(self, directory: str) -> bool:
|
|
47
|
+
documents = SimpleDirectoryReader(input_dir=directory).load_data()
|
|
48
|
+
nodes = self._split_documents(documents)
|
|
49
|
+
self._vector_index.insert_nodes(nodes)
|
|
50
|
+
return True
|
|
51
|
+
|
|
52
|
+
@override
|
|
53
|
+
def add_from_files(self, files: list[str]) -> bool:
|
|
54
|
+
documents = SimpleDirectoryReader(input_files=files).load_data()
|
|
55
|
+
nodes = self._split_documents(documents)
|
|
56
|
+
self._vector_index.insert_nodes(nodes)
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
@override
|
|
60
|
+
def add_from_text(self, text: str | list[str]) -> bool:
|
|
61
|
+
if isinstance(text, str):
|
|
62
|
+
documents = [Document(text=text)]
|
|
63
|
+
else:
|
|
64
|
+
documents = [Document(text=t) for t in text]
|
|
65
|
+
nodes = self._split_documents(documents)
|
|
66
|
+
self._vector_index.insert_nodes(nodes)
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
@override
|
|
70
|
+
def search(self, query: str, top_k: int = 5) -> list[str]:
|
|
71
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
72
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
73
|
+
return [node.text for node in retrieved_nodes]
|
|
74
|
+
|
|
75
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
76
|
+
"""Split document into chunks"""
|
|
77
|
+
nodes = []
|
|
78
|
+
for document in documents:
|
|
79
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
80
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
81
|
+
nodes.extend(_nodes)
|
|
82
|
+
return nodes
|