veadk-python 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- veadk/__init__.py +37 -0
- veadk/a2a/__init__.py +13 -0
- veadk/a2a/agent_card.py +45 -0
- veadk/a2a/remote_ve_agent.py +390 -0
- veadk/a2a/utils/__init__.py +13 -0
- veadk/a2a/utils/agent_to_a2a.py +170 -0
- veadk/a2a/ve_a2a_server.py +93 -0
- veadk/a2a/ve_agent_executor.py +78 -0
- veadk/a2a/ve_middlewares.py +313 -0
- veadk/a2a/ve_task_store.py +37 -0
- veadk/agent.py +402 -0
- veadk/agent_builder.py +93 -0
- veadk/agents/loop_agent.py +68 -0
- veadk/agents/parallel_agent.py +72 -0
- veadk/agents/sequential_agent.py +64 -0
- veadk/auth/__init__.py +13 -0
- veadk/auth/base_auth.py +22 -0
- veadk/auth/ve_credential_service.py +203 -0
- veadk/auth/veauth/__init__.py +13 -0
- veadk/auth/veauth/apmplus_veauth.py +58 -0
- veadk/auth/veauth/ark_veauth.py +75 -0
- veadk/auth/veauth/base_veauth.py +50 -0
- veadk/auth/veauth/cozeloop_veauth.py +13 -0
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
- veadk/auth/veauth/speech_veauth.py +54 -0
- veadk/auth/veauth/utils.py +69 -0
- veadk/auth/veauth/vesearch_veauth.py +62 -0
- veadk/auth/veauth/viking_mem0_veauth.py +91 -0
- veadk/cli/__init__.py +13 -0
- veadk/cli/cli.py +58 -0
- veadk/cli/cli_clean.py +87 -0
- veadk/cli/cli_create.py +163 -0
- veadk/cli/cli_deploy.py +233 -0
- veadk/cli/cli_eval.py +215 -0
- veadk/cli/cli_init.py +214 -0
- veadk/cli/cli_kb.py +110 -0
- veadk/cli/cli_pipeline.py +285 -0
- veadk/cli/cli_prompt.py +86 -0
- veadk/cli/cli_update.py +106 -0
- veadk/cli/cli_uploadevalset.py +139 -0
- veadk/cli/cli_web.py +143 -0
- veadk/cloud/__init__.py +13 -0
- veadk/cloud/cloud_agent_engine.py +485 -0
- veadk/cloud/cloud_app.py +475 -0
- veadk/config.py +115 -0
- veadk/configs/__init__.py +13 -0
- veadk/configs/auth_configs.py +133 -0
- veadk/configs/database_configs.py +132 -0
- veadk/configs/model_configs.py +78 -0
- veadk/configs/tool_configs.py +54 -0
- veadk/configs/tracing_configs.py +110 -0
- veadk/consts.py +74 -0
- veadk/evaluation/__init__.py +17 -0
- veadk/evaluation/adk_evaluator/__init__.py +17 -0
- veadk/evaluation/adk_evaluator/adk_evaluator.py +302 -0
- veadk/evaluation/base_evaluator.py +642 -0
- veadk/evaluation/deepeval_evaluator/__init__.py +17 -0
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +339 -0
- veadk/evaluation/eval_set_file_loader.py +48 -0
- veadk/evaluation/eval_set_recorder.py +146 -0
- veadk/evaluation/types.py +65 -0
- veadk/evaluation/utils/prometheus.py +196 -0
- veadk/integrations/__init__.py +13 -0
- veadk/integrations/ve_apig/__init__.py +13 -0
- veadk/integrations/ve_apig/ve_apig.py +349 -0
- veadk/integrations/ve_apig/ve_apig_utils.py +332 -0
- veadk/integrations/ve_code_pipeline/__init__.py +13 -0
- veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
- veadk/integrations/ve_cozeloop/__init__.py +13 -0
- veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
- veadk/integrations/ve_cr/__init__.py +13 -0
- veadk/integrations/ve_cr/ve_cr.py +220 -0
- veadk/integrations/ve_faas/__init__.py +13 -0
- veadk/integrations/ve_faas/template/cookiecutter.json +15 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/__init__.py +13 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/clean.py +23 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/config.yaml.example +6 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +106 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/__init__.py +13 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +25 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/app.py +202 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/requirements.txt +3 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +49 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{ cookiecutter.app_name }}/__init__.py +14 -0
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{ cookiecutter.app_name }}/agent.py +27 -0
- veadk/integrations/ve_faas/ve_faas.py +754 -0
- veadk/integrations/ve_faas/ve_faas_utils.py +408 -0
- veadk/integrations/ve_faas/web_template/cookiecutter.json +20 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/__init__.py +13 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/clean.py +23 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/config.yaml.example +2 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/deploy.py +44 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/Dockerfile +23 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/app.py +123 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/init_db.py +46 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/models.py +36 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/requirements.txt +4 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/run.sh +21 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/static/css/style.css +368 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/static/js/admin.js +0 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/dashboard.html +21 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/edit_post.html +24 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/login.html +21 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/admin/posts.html +53 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/base.html +45 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/index.html +29 -0
- veadk/integrations/ve_faas/web_template/{{cookiecutter.local_dir_name}}/src/templates/post.html +14 -0
- veadk/integrations/ve_identity/__init__.py +110 -0
- veadk/integrations/ve_identity/auth_config.py +261 -0
- veadk/integrations/ve_identity/auth_mixins.py +650 -0
- veadk/integrations/ve_identity/auth_processor.py +385 -0
- veadk/integrations/ve_identity/function_tool.py +158 -0
- veadk/integrations/ve_identity/identity_client.py +864 -0
- veadk/integrations/ve_identity/mcp_tool.py +181 -0
- veadk/integrations/ve_identity/mcp_toolset.py +431 -0
- veadk/integrations/ve_identity/models.py +228 -0
- veadk/integrations/ve_identity/token_manager.py +188 -0
- veadk/integrations/ve_identity/utils.py +151 -0
- veadk/integrations/ve_prompt_pilot/__init__.py +13 -0
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +85 -0
- veadk/integrations/ve_tls/__init__.py +13 -0
- veadk/integrations/ve_tls/utils.py +116 -0
- veadk/integrations/ve_tls/ve_tls.py +212 -0
- veadk/integrations/ve_tos/ve_tos.py +710 -0
- veadk/integrations/ve_viking_db_memory/__init__.py +13 -0
- veadk/integrations/ve_viking_db_memory/ve_viking_db_memory.py +308 -0
- veadk/knowledgebase/__init__.py +17 -0
- veadk/knowledgebase/backends/__init__.py +13 -0
- veadk/knowledgebase/backends/base_backend.py +72 -0
- veadk/knowledgebase/backends/in_memory_backend.py +91 -0
- veadk/knowledgebase/backends/opensearch_backend.py +162 -0
- veadk/knowledgebase/backends/redis_backend.py +172 -0
- veadk/knowledgebase/backends/utils.py +92 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +608 -0
- veadk/knowledgebase/entry.py +25 -0
- veadk/knowledgebase/knowledgebase.py +307 -0
- veadk/memory/__init__.py +35 -0
- veadk/memory/long_term_memory.py +365 -0
- veadk/memory/long_term_memory_backends/__init__.py +13 -0
- veadk/memory/long_term_memory_backends/base_backend.py +35 -0
- veadk/memory/long_term_memory_backends/in_memory_backend.py +67 -0
- veadk/memory/long_term_memory_backends/mem0_backend.py +155 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +124 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +140 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +189 -0
- veadk/memory/short_term_memory.py +252 -0
- veadk/memory/short_term_memory_backends/__init__.py +13 -0
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +49 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +49 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +55 -0
- veadk/memory/short_term_memory_processor.py +100 -0
- veadk/processors/__init__.py +26 -0
- veadk/processors/base_run_processor.py +120 -0
- veadk/prompts/__init__.py +13 -0
- veadk/prompts/agent_default_prompt.py +30 -0
- veadk/prompts/prompt_evaluator.py +20 -0
- veadk/prompts/prompt_memory_processor.py +55 -0
- veadk/prompts/prompt_optimization.py +150 -0
- veadk/runner.py +732 -0
- veadk/tools/__init__.py +13 -0
- veadk/tools/builtin_tools/__init__.py +13 -0
- veadk/tools/builtin_tools/agent_authorization.py +94 -0
- veadk/tools/builtin_tools/generate_image.py +23 -0
- veadk/tools/builtin_tools/image_edit.py +300 -0
- veadk/tools/builtin_tools/image_generate.py +446 -0
- veadk/tools/builtin_tools/lark.py +67 -0
- veadk/tools/builtin_tools/las.py +24 -0
- veadk/tools/builtin_tools/link_reader.py +66 -0
- veadk/tools/builtin_tools/llm_shield.py +381 -0
- veadk/tools/builtin_tools/load_knowledgebase.py +97 -0
- veadk/tools/builtin_tools/mcp_router.py +29 -0
- veadk/tools/builtin_tools/run_code.py +113 -0
- veadk/tools/builtin_tools/tts.py +253 -0
- veadk/tools/builtin_tools/vesearch.py +49 -0
- veadk/tools/builtin_tools/video_generate.py +363 -0
- veadk/tools/builtin_tools/web_scraper.py +76 -0
- veadk/tools/builtin_tools/web_search.py +83 -0
- veadk/tools/demo_tools.py +58 -0
- veadk/tools/load_knowledgebase_tool.py +149 -0
- veadk/tools/sandbox/__init__.py +13 -0
- veadk/tools/sandbox/browser_sandbox.py +37 -0
- veadk/tools/sandbox/code_sandbox.py +40 -0
- veadk/tools/sandbox/computer_sandbox.py +34 -0
- veadk/tracing/__init__.py +13 -0
- veadk/tracing/base_tracer.py +58 -0
- veadk/tracing/telemetry/__init__.py +13 -0
- veadk/tracing/telemetry/attributes/attributes.py +29 -0
- veadk/tracing/telemetry/attributes/extractors/common_attributes_extractors.py +180 -0
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +858 -0
- veadk/tracing/telemetry/attributes/extractors/tool_attributes_extractors.py +152 -0
- veadk/tracing/telemetry/attributes/extractors/types.py +164 -0
- veadk/tracing/telemetry/exporters/__init__.py +13 -0
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +558 -0
- veadk/tracing/telemetry/exporters/base_exporter.py +39 -0
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +129 -0
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +248 -0
- veadk/tracing/telemetry/exporters/tls_exporter.py +139 -0
- veadk/tracing/telemetry/opentelemetry_tracer.py +320 -0
- veadk/tracing/telemetry/telemetry.py +411 -0
- veadk/types.py +47 -0
- veadk/utils/__init__.py +13 -0
- veadk/utils/audio_manager.py +95 -0
- veadk/utils/auth.py +294 -0
- veadk/utils/logger.py +59 -0
- veadk/utils/mcp_utils.py +44 -0
- veadk/utils/misc.py +184 -0
- veadk/utils/patches.py +101 -0
- veadk/utils/volcengine_sign.py +205 -0
- veadk/version.py +15 -0
- veadk_python-0.2.27.dist-info/METADATA +373 -0
- veadk_python-0.2.27.dist-info/RECORD +218 -0
- veadk_python-0.2.27.dist-info/WHEEL +5 -0
- veadk_python-0.2.27.dist-info/entry_points.txt +2 -0
- veadk_python-0.2.27.dist-info/licenses/LICENSE +201 -0
- veadk_python-0.2.27.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import threading
|
|
17
|
+
from veadk.utils.misc import getenv
|
|
18
|
+
from volcengine.ApiInfo import ApiInfo
|
|
19
|
+
from volcengine.auth.SignerV4 import SignerV4
|
|
20
|
+
from volcengine.base.Service import Service
|
|
21
|
+
from volcengine.Credentials import Credentials
|
|
22
|
+
from volcengine.ServiceInfo import ServiceInfo
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class VikingDBMemoryException(Exception):
|
|
26
|
+
def __init__(self, code, request_id, message=None):
|
|
27
|
+
self.code = code
|
|
28
|
+
self.request_id = request_id
|
|
29
|
+
self.message = "{}, code:{},request_id:{}".format(
|
|
30
|
+
message, self.code, self.request_id
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def __str__(self):
|
|
34
|
+
return self.message
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class VikingDBMemoryClient(Service):
|
|
38
|
+
_instance_lock = threading.Lock()
|
|
39
|
+
|
|
40
|
+
def __new__(cls, *args, **kwargs):
|
|
41
|
+
if not hasattr(VikingDBMemoryClient, "_instance"):
|
|
42
|
+
with VikingDBMemoryClient._instance_lock:
|
|
43
|
+
if not hasattr(VikingDBMemoryClient, "_instance"):
|
|
44
|
+
VikingDBMemoryClient._instance = object.__new__(cls)
|
|
45
|
+
return VikingDBMemoryClient._instance
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
host="api-knowledgebase.mlp.cn-beijing.volces.com",
|
|
50
|
+
region="cn-beijing",
|
|
51
|
+
ak="",
|
|
52
|
+
sk="",
|
|
53
|
+
sts_token="",
|
|
54
|
+
scheme="https",
|
|
55
|
+
connection_timeout=30,
|
|
56
|
+
socket_timeout=30,
|
|
57
|
+
):
|
|
58
|
+
env_host = getenv(
|
|
59
|
+
"DATABASE_VIKINGMEM_BASE_URL", default_value=None, allow_false_values=True
|
|
60
|
+
)
|
|
61
|
+
if env_host:
|
|
62
|
+
if env_host.startswith("http://"):
|
|
63
|
+
host = env_host.replace("http://", "")
|
|
64
|
+
scheme = "http"
|
|
65
|
+
elif env_host.startswith("https://"):
|
|
66
|
+
host = env_host.replace("https://", "")
|
|
67
|
+
scheme = "https"
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"DATABASE_VIKINGMEM_BASE_URL must start with http:// or https://"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self.service_info = VikingDBMemoryClient.get_service_info(
|
|
74
|
+
host, region, scheme, connection_timeout, socket_timeout
|
|
75
|
+
)
|
|
76
|
+
self.api_info = VikingDBMemoryClient.get_api_info()
|
|
77
|
+
super(VikingDBMemoryClient, self).__init__(self.service_info, self.api_info)
|
|
78
|
+
if ak:
|
|
79
|
+
self.set_ak(ak)
|
|
80
|
+
if sk:
|
|
81
|
+
self.set_sk(sk)
|
|
82
|
+
if sts_token:
|
|
83
|
+
self.set_session_token(session_token=sts_token)
|
|
84
|
+
try:
|
|
85
|
+
self.get_body("Ping", {}, json.dumps({}))
|
|
86
|
+
except Exception as e:
|
|
87
|
+
raise VikingDBMemoryException(
|
|
88
|
+
1000028, "missed", "host or region is incorrect: {}".format(str(e))
|
|
89
|
+
) from None
|
|
90
|
+
|
|
91
|
+
def setHeader(self, header):
|
|
92
|
+
api_info = VikingDBMemoryClient.get_api_info()
|
|
93
|
+
for key in api_info:
|
|
94
|
+
for item in header:
|
|
95
|
+
api_info[key].header[item] = header[item]
|
|
96
|
+
self.api_info = api_info
|
|
97
|
+
|
|
98
|
+
@staticmethod
|
|
99
|
+
def get_service_info(host, region, scheme, connection_timeout, socket_timeout):
|
|
100
|
+
service_info = ServiceInfo(
|
|
101
|
+
host,
|
|
102
|
+
{"Host": host},
|
|
103
|
+
Credentials("", "", "air", region),
|
|
104
|
+
connection_timeout,
|
|
105
|
+
socket_timeout,
|
|
106
|
+
scheme=scheme,
|
|
107
|
+
)
|
|
108
|
+
return service_info
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def get_api_info():
|
|
112
|
+
api_info = {
|
|
113
|
+
"CreateCollection": ApiInfo(
|
|
114
|
+
"POST",
|
|
115
|
+
"/api/memory/collection/create",
|
|
116
|
+
{},
|
|
117
|
+
{},
|
|
118
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
119
|
+
),
|
|
120
|
+
"GetCollection": ApiInfo(
|
|
121
|
+
"POST",
|
|
122
|
+
"/api/memory/collection/info",
|
|
123
|
+
{},
|
|
124
|
+
{},
|
|
125
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
126
|
+
),
|
|
127
|
+
"DropCollection": ApiInfo(
|
|
128
|
+
"POST",
|
|
129
|
+
"/api/memory/collection/delete",
|
|
130
|
+
{},
|
|
131
|
+
{},
|
|
132
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
133
|
+
),
|
|
134
|
+
"UpdateCollection": ApiInfo(
|
|
135
|
+
"POST",
|
|
136
|
+
"/api/memory/collection/update",
|
|
137
|
+
{},
|
|
138
|
+
{},
|
|
139
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
140
|
+
),
|
|
141
|
+
"SearchMemory": ApiInfo(
|
|
142
|
+
"POST",
|
|
143
|
+
"/api/memory/search",
|
|
144
|
+
{},
|
|
145
|
+
{},
|
|
146
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
147
|
+
),
|
|
148
|
+
"AddMessages": ApiInfo(
|
|
149
|
+
"POST",
|
|
150
|
+
"/api/memory/messages/add",
|
|
151
|
+
{},
|
|
152
|
+
{},
|
|
153
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
154
|
+
),
|
|
155
|
+
"Ping": ApiInfo(
|
|
156
|
+
"GET",
|
|
157
|
+
"/api/memory/ping",
|
|
158
|
+
{},
|
|
159
|
+
{},
|
|
160
|
+
{"Accept": "application/json", "Content-Type": "application/json"},
|
|
161
|
+
),
|
|
162
|
+
}
|
|
163
|
+
return api_info
|
|
164
|
+
|
|
165
|
+
def get_body(self, api, params, body):
|
|
166
|
+
if api not in self.api_info:
|
|
167
|
+
raise Exception("no such api")
|
|
168
|
+
api_info = self.api_info[api]
|
|
169
|
+
r = self.prepare_request(api_info, params)
|
|
170
|
+
r.headers["Content-Type"] = "application/json"
|
|
171
|
+
r.headers["Traffic-Source"] = "SDK"
|
|
172
|
+
r.body = body
|
|
173
|
+
|
|
174
|
+
SignerV4.sign(r, self.service_info.credentials)
|
|
175
|
+
|
|
176
|
+
url = r.build()
|
|
177
|
+
resp = self.session.get(
|
|
178
|
+
url,
|
|
179
|
+
headers=r.headers,
|
|
180
|
+
data=r.body,
|
|
181
|
+
timeout=(
|
|
182
|
+
self.service_info.connection_timeout,
|
|
183
|
+
self.service_info.socket_timeout,
|
|
184
|
+
),
|
|
185
|
+
)
|
|
186
|
+
if resp.status_code == 200:
|
|
187
|
+
return json.dumps(resp.json())
|
|
188
|
+
else:
|
|
189
|
+
raise Exception(resp.text.encode("utf-8"))
|
|
190
|
+
|
|
191
|
+
def get_body_exception(self, api, params, body):
|
|
192
|
+
try:
|
|
193
|
+
res = self.get_body(api, params, body)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
try:
|
|
196
|
+
res_json = json.loads(e.args[0].decode("utf-8"))
|
|
197
|
+
except Exception as e:
|
|
198
|
+
raise VikingDBMemoryException(
|
|
199
|
+
1000028, "missed", "json load res error, res:{}".format(str(e))
|
|
200
|
+
) from None
|
|
201
|
+
code = res_json.get("code", 1000028)
|
|
202
|
+
request_id = res_json.get("request_id", 1000028)
|
|
203
|
+
message = res_json.get("message", None)
|
|
204
|
+
|
|
205
|
+
raise VikingDBMemoryException(code, request_id, message)
|
|
206
|
+
|
|
207
|
+
if res == "":
|
|
208
|
+
raise VikingDBMemoryException(
|
|
209
|
+
1000028,
|
|
210
|
+
"missed",
|
|
211
|
+
"empty response due to unknown error, please contact customer service",
|
|
212
|
+
) from None
|
|
213
|
+
return res
|
|
214
|
+
|
|
215
|
+
def get_exception(self, api, params):
|
|
216
|
+
try:
|
|
217
|
+
res = self.get(api, params)
|
|
218
|
+
except Exception as e:
|
|
219
|
+
try:
|
|
220
|
+
res_json = json.loads(e.args[0].decode("utf-8"))
|
|
221
|
+
except Exception as e:
|
|
222
|
+
raise VikingDBMemoryException(
|
|
223
|
+
1000028, "missed", "json load res error, res:{}".format(str(e))
|
|
224
|
+
) from None
|
|
225
|
+
code = res_json.get("code", 1000028)
|
|
226
|
+
request_id = res_json.get("request_id", 1000028)
|
|
227
|
+
message = res_json.get("message", None)
|
|
228
|
+
raise VikingDBMemoryException(code, request_id, message)
|
|
229
|
+
if res == "":
|
|
230
|
+
raise VikingDBMemoryException(
|
|
231
|
+
1000028,
|
|
232
|
+
"missed",
|
|
233
|
+
"empty response due to unknown error, please contact customer service",
|
|
234
|
+
) from None
|
|
235
|
+
return res
|
|
236
|
+
|
|
237
|
+
def create_collection(
|
|
238
|
+
self,
|
|
239
|
+
collection_name,
|
|
240
|
+
description="",
|
|
241
|
+
custom_event_type_schemas=[],
|
|
242
|
+
custom_entity_type_schemas=[],
|
|
243
|
+
builtin_event_types=[],
|
|
244
|
+
builtin_entity_types=[],
|
|
245
|
+
):
|
|
246
|
+
params = {
|
|
247
|
+
"CollectionName": collection_name,
|
|
248
|
+
"Description": description,
|
|
249
|
+
"CustomEventTypeSchemas": custom_event_type_schemas,
|
|
250
|
+
"CustomEntityTypeSchemas": custom_entity_type_schemas,
|
|
251
|
+
"BuiltinEventTypes": builtin_event_types,
|
|
252
|
+
"BuiltinEntityTypes": builtin_entity_types,
|
|
253
|
+
}
|
|
254
|
+
res = self.json("CreateCollection", {}, json.dumps(params))
|
|
255
|
+
return json.loads(res)
|
|
256
|
+
|
|
257
|
+
def get_collection(self, collection_name):
|
|
258
|
+
params = {"CollectionName": collection_name}
|
|
259
|
+
res = self.json("GetCollection", {}, json.dumps(params))
|
|
260
|
+
return json.loads(res)
|
|
261
|
+
|
|
262
|
+
def drop_collection(self, collection_name):
|
|
263
|
+
params = {"CollectionName": collection_name}
|
|
264
|
+
res = self.json("DropCollection", {}, json.dumps(params))
|
|
265
|
+
return json.loads(res)
|
|
266
|
+
|
|
267
|
+
def update_collection(
|
|
268
|
+
self,
|
|
269
|
+
collection_name,
|
|
270
|
+
custom_event_type_schemas=[],
|
|
271
|
+
custom_entity_type_schemas=[],
|
|
272
|
+
builtin_event_types=[],
|
|
273
|
+
builtin_entity_types=[],
|
|
274
|
+
):
|
|
275
|
+
params = {
|
|
276
|
+
"CollectionName": collection_name,
|
|
277
|
+
"CustomEventTypeSchemas": custom_event_type_schemas,
|
|
278
|
+
"CustomEntityTypeSchemas": custom_entity_type_schemas,
|
|
279
|
+
"BuiltinEventTypes": builtin_event_types,
|
|
280
|
+
"BuiltinEntityTypes": builtin_entity_types,
|
|
281
|
+
}
|
|
282
|
+
res = self.json("UpdateCollection", {}, json.dumps(params))
|
|
283
|
+
return json.loads(res)
|
|
284
|
+
|
|
285
|
+
def search_memory(self, collection_name, query, filter, limit=10):
|
|
286
|
+
params = {
|
|
287
|
+
"collection_name": collection_name,
|
|
288
|
+
"limit": limit,
|
|
289
|
+
"filter": filter,
|
|
290
|
+
}
|
|
291
|
+
if query:
|
|
292
|
+
params["query"] = query
|
|
293
|
+
res = self.json("SearchMemory", {}, json.dumps(params))
|
|
294
|
+
return json.loads(res)
|
|
295
|
+
|
|
296
|
+
def add_messages(
|
|
297
|
+
self, collection_name, session_id, messages, metadata, entities=None
|
|
298
|
+
):
|
|
299
|
+
params = {
|
|
300
|
+
"collection_name": collection_name,
|
|
301
|
+
"session_id": session_id,
|
|
302
|
+
"messages": messages,
|
|
303
|
+
"metadata": metadata,
|
|
304
|
+
}
|
|
305
|
+
if entities is not None:
|
|
306
|
+
params["entities"] = entities
|
|
307
|
+
res = self.json("AddMessages", {}, json.dumps(params))
|
|
308
|
+
return json.loads(res)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .knowledgebase import KnowledgeBase
|
|
16
|
+
|
|
17
|
+
__all__ = ["KnowledgeBase"]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseKnowledgebaseBackend(ABC, BaseModel):
|
|
21
|
+
"""Base backend for knowledgebase.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
index (str): Index or collection name of the vector storage.
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
You can implement your own knowledgebase backend.
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
class CustomKnowledgebaseBackend(BaseKnowledgebaseBackend):
|
|
31
|
+
pass
|
|
32
|
+
```
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
index: str
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def precheck_index_naming(self) -> None:
|
|
39
|
+
"""Check the index name is valid or not.
|
|
40
|
+
|
|
41
|
+
If index naming is not valid, raise an exception.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def add_from_directory(self, directory: str, *args, **kwargs) -> bool:
|
|
46
|
+
"""Add knowledge from file path to knowledgebase"""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def add_from_files(self, files: list[str], *args, **kwargs) -> bool:
|
|
50
|
+
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def add_from_text(self, text: str | list[str], *args, **kwargs) -> bool:
|
|
54
|
+
"""Add knowledge from text to knowledgebase"""
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def search(self, *args, **kwargs) -> list:
|
|
58
|
+
"""Search knowledge from knowledgebase"""
|
|
59
|
+
|
|
60
|
+
# Optional methods for future use:
|
|
61
|
+
# - `delete`: Delete collection or documents
|
|
62
|
+
# - `list_docs`: List original documents
|
|
63
|
+
# - `list_chunks`: List embedded document chunks
|
|
64
|
+
|
|
65
|
+
# def delete(self, **kwargs) -> bool:
|
|
66
|
+
# """Delete knowledge from knowledgebase"""
|
|
67
|
+
|
|
68
|
+
# def list_docs(self, **kwargs) -> None:
|
|
69
|
+
# """List original documents in knowledgebase"""
|
|
70
|
+
|
|
71
|
+
# def list_chunks(self, **kwargs) -> None:
|
|
72
|
+
# """List embeded document chunks in knowledgebase"""
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex
|
|
16
|
+
from llama_index.core.schema import BaseNode
|
|
17
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
from typing_extensions import Any, override
|
|
20
|
+
|
|
21
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
22
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
23
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class InMemoryKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
27
|
+
"""A in-memory implementation backend for knowledgebase.
|
|
28
|
+
|
|
29
|
+
In-memory backend stores embedded text in a vector storage from Llama-index.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
embedding_config (EmbeddingModelConfig):
|
|
33
|
+
Embedding config for text embedding and search.
|
|
34
|
+
Embedding config contains embedding model name and the corresponding dim.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
embedding_config: NormalEmbeddingModelConfig | EmbeddingModelConfig = Field(
|
|
38
|
+
default_factory=EmbeddingModelConfig
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def model_post_init(self, __context: Any) -> None:
|
|
42
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
43
|
+
model_name=self.embedding_config.name,
|
|
44
|
+
api_key=self.embedding_config.api_key,
|
|
45
|
+
api_base=self.embedding_config.api_base,
|
|
46
|
+
)
|
|
47
|
+
self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
|
|
48
|
+
|
|
49
|
+
@override
|
|
50
|
+
def precheck_index_naming(self) -> None:
|
|
51
|
+
# Checking is not needed
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@override
|
|
55
|
+
def add_from_directory(self, directory: str) -> bool:
|
|
56
|
+
documents = SimpleDirectoryReader(input_dir=directory).load_data()
|
|
57
|
+
nodes = self._split_documents(documents)
|
|
58
|
+
self._vector_index.insert_nodes(nodes)
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
@override
|
|
62
|
+
def add_from_files(self, files: list[str]) -> bool:
|
|
63
|
+
documents = SimpleDirectoryReader(input_files=files).load_data()
|
|
64
|
+
nodes = self._split_documents(documents)
|
|
65
|
+
self._vector_index.insert_nodes(nodes)
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
@override
|
|
69
|
+
def add_from_text(self, text: str | list[str]) -> bool:
|
|
70
|
+
if isinstance(text, str):
|
|
71
|
+
documents = [Document(text=text)]
|
|
72
|
+
else:
|
|
73
|
+
documents = [Document(text=t) for t in text]
|
|
74
|
+
nodes = self._split_documents(documents)
|
|
75
|
+
self._vector_index.insert_nodes(nodes)
|
|
76
|
+
return True
|
|
77
|
+
|
|
78
|
+
@override
|
|
79
|
+
def search(self, query: str, top_k: int = 5) -> list[str]:
|
|
80
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
81
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
82
|
+
return [node.text for node in retrieved_nodes]
|
|
83
|
+
|
|
84
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
85
|
+
"""Split document into chunks"""
|
|
86
|
+
nodes = []
|
|
87
|
+
for document in documents:
|
|
88
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
89
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
90
|
+
nodes.extend(_nodes)
|
|
91
|
+
return nodes
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
from llama_index.core import (
|
|
18
|
+
Document,
|
|
19
|
+
SimpleDirectoryReader,
|
|
20
|
+
StorageContext,
|
|
21
|
+
VectorStoreIndex,
|
|
22
|
+
)
|
|
23
|
+
from llama_index.core.schema import BaseNode
|
|
24
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
25
|
+
from pydantic import Field
|
|
26
|
+
from typing_extensions import Any, override
|
|
27
|
+
|
|
28
|
+
import veadk.config # noqa E401
|
|
29
|
+
from veadk.configs.database_configs import OpensearchConfig
|
|
30
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
31
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
32
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
from llama_index.vector_stores.opensearch import (
|
|
36
|
+
OpensearchVectorClient,
|
|
37
|
+
OpensearchVectorStore,
|
|
38
|
+
)
|
|
39
|
+
except ImportError:
|
|
40
|
+
raise ImportError(
|
|
41
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class OpensearchKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
46
|
+
"""Opensearch-based backend for knowledgebase.
|
|
47
|
+
|
|
48
|
+
Opensearch backend stores embedded text in a opensearch database by Llama-index.
|
|
49
|
+
|
|
50
|
+
Attributes:
|
|
51
|
+
opensearch_config (OpensearchConfig):
|
|
52
|
+
Opensearch database configurations.
|
|
53
|
+
Mainly contains opensearch host, port, username, password, etc.
|
|
54
|
+
embedding_config (EmbeddingModelConfig):
|
|
55
|
+
Embedding config for text embedding and search.
|
|
56
|
+
Embedding config contains embedding model name and the corresponding dim.
|
|
57
|
+
|
|
58
|
+
Examples:
|
|
59
|
+
Init a knowledgebase based on opensearch backend.
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
knowledgebase = Knowledgebase(backend="opensearch")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
With more configurations:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
...
|
|
69
|
+
```
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
opensearch_config: OpensearchConfig = Field(default_factory=OpensearchConfig)
|
|
73
|
+
"""Opensearch client configs"""
|
|
74
|
+
|
|
75
|
+
embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
|
|
76
|
+
default_factory=EmbeddingModelConfig
|
|
77
|
+
)
|
|
78
|
+
"""Embedding model configs"""
|
|
79
|
+
|
|
80
|
+
def model_post_init(self, __context: Any) -> None:
|
|
81
|
+
self.precheck_index_naming()
|
|
82
|
+
self._opensearch_client = OpensearchVectorClient(
|
|
83
|
+
endpoint=self.opensearch_config.host,
|
|
84
|
+
port=self.opensearch_config.port,
|
|
85
|
+
http_auth=(
|
|
86
|
+
self.opensearch_config.username,
|
|
87
|
+
self.opensearch_config.password,
|
|
88
|
+
),
|
|
89
|
+
use_ssl=True,
|
|
90
|
+
verify_certs=False,
|
|
91
|
+
dim=self.embedding_config.dim,
|
|
92
|
+
index=self.index, # collection name
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
self._vector_store = OpensearchVectorStore(client=self._opensearch_client)
|
|
96
|
+
|
|
97
|
+
self._storage_context = StorageContext.from_defaults(
|
|
98
|
+
vector_store=self._vector_store
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
102
|
+
model_name=self.embedding_config.name,
|
|
103
|
+
api_key=self.embedding_config.api_key,
|
|
104
|
+
api_base=self.embedding_config.api_base,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self._vector_index = VectorStoreIndex.from_documents(
|
|
108
|
+
documents=[],
|
|
109
|
+
storage_context=self._storage_context,
|
|
110
|
+
embed_model=self._embed_model,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
@override
|
|
114
|
+
def precheck_index_naming(self) -> None:
|
|
115
|
+
if not (
|
|
116
|
+
isinstance(self.index, str)
|
|
117
|
+
and not self.index.startswith(("_", "-"))
|
|
118
|
+
and self.index.islower()
|
|
119
|
+
and re.match(r"^[a-z0-9_\-.]+$", self.index)
|
|
120
|
+
):
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"The index name does not conform to the naming rules of OpenSearch"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@override
|
|
126
|
+
def add_from_directory(self, directory: str) -> bool:
|
|
127
|
+
documents = SimpleDirectoryReader(input_dir=directory).load_data()
|
|
128
|
+
nodes = self._split_documents(documents)
|
|
129
|
+
self._vector_index.insert_nodes(nodes)
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
@override
|
|
133
|
+
def add_from_files(self, files: list[str]) -> bool:
|
|
134
|
+
documents = SimpleDirectoryReader(input_files=files).load_data()
|
|
135
|
+
nodes = self._split_documents(documents)
|
|
136
|
+
self._vector_index.insert_nodes(nodes)
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
@override
|
|
140
|
+
def add_from_text(self, text: str | list[str]) -> bool:
|
|
141
|
+
if isinstance(text, str):
|
|
142
|
+
documents = [Document(text=text)]
|
|
143
|
+
else:
|
|
144
|
+
documents = [Document(text=t) for t in text]
|
|
145
|
+
nodes = self._split_documents(documents)
|
|
146
|
+
self._vector_index.insert_nodes(nodes)
|
|
147
|
+
return True
|
|
148
|
+
|
|
149
|
+
@override
|
|
150
|
+
def search(self, query: str, top_k: int = 5) -> list[str]:
|
|
151
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
152
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
153
|
+
return [node.text for node in retrieved_nodes]
|
|
154
|
+
|
|
155
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
156
|
+
"""Split document into chunks"""
|
|
157
|
+
nodes = []
|
|
158
|
+
for document in documents:
|
|
159
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
160
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
161
|
+
nodes.extend(_nodes)
|
|
162
|
+
return nodes
|