veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/agent.py +11 -18
- veadk/agent_builder.py +94 -0
- veadk/{database/__init__.py → auth/base_auth.py} +7 -2
- veadk/auth/veauth/apmplus_veauth.py +65 -0
- veadk/auth/veauth/ark_veauth.py +77 -0
- veadk/auth/veauth/base_veauth.py +50 -0
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
- veadk/auth/veauth/vesearch_veauth.py +62 -0
- veadk/cli/cli.py +4 -0
- veadk/cli/cli_deploy.py +3 -2
- veadk/cli/cli_eval.py +160 -0
- veadk/cli/cli_init.py +1 -1
- veadk/cli/cli_pipeline.py +220 -0
- veadk/cli/cli_prompt.py +4 -4
- veadk/cli/cli_web.py +3 -1
- veadk/config.py +45 -81
- veadk/configs/database_configs.py +117 -0
- veadk/configs/model_configs.py +74 -0
- veadk/configs/tool_configs.py +42 -0
- veadk/configs/tracing_configs.py +110 -0
- veadk/consts.py +13 -1
- veadk/evaluation/base_evaluator.py +60 -44
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
- veadk/evaluation/eval_set_recorder.py +2 -2
- veadk/integrations/ve_code_pipeline/__init__.py +13 -0
- veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
- veadk/integrations/ve_cozeloop/__init__.py +13 -0
- veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
- veadk/integrations/ve_cr/ve_cr.py +20 -5
- veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
- veadk/integrations/ve_faas/ve_faas.py +351 -36
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
- veadk/integrations/ve_tls/__init__.py +13 -0
- veadk/integrations/ve_tls/utils.py +117 -0
- veadk/integrations/ve_tls/ve_tls.py +208 -0
- veadk/integrations/ve_tos/ve_tos.py +71 -75
- veadk/knowledgebase/backends/__init__.py +13 -0
- veadk/knowledgebase/backends/base_backend.py +59 -0
- veadk/knowledgebase/backends/in_memory_backend.py +82 -0
- veadk/knowledgebase/backends/opensearch_backend.py +136 -0
- veadk/knowledgebase/backends/redis_backend.py +144 -0
- veadk/knowledgebase/backends/utils.py +91 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
- veadk/knowledgebase/knowledgebase.py +109 -55
- veadk/memory/__init__.py +22 -0
- veadk/memory/long_term_memory.py +120 -51
- veadk/memory/long_term_memory_backends/__init__.py +13 -0
- veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
- veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
- veadk/memory/short_term_memory.py +80 -72
- veadk/memory/short_term_memory_backends/__init__.py +13 -0
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
- veadk/memory/short_term_memory_processor.py +9 -4
- veadk/runner.py +204 -247
- veadk/tools/builtin_tools/vesearch.py +2 -2
- veadk/tools/builtin_tools/video_generate.py +27 -20
- veadk/tools/builtin_tools/web_scraper.py +1 -1
- veadk/tools/builtin_tools/web_search.py +7 -7
- veadk/tools/load_knowledgebase_tool.py +1 -1
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
- veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
- veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
- veadk/tracing/telemetry/telemetry.py +66 -60
- veadk/utils/logger.py +1 -1
- veadk/utils/misc.py +63 -0
- veadk/utils/volcengine_sign.py +6 -2
- veadk/version.py +1 -1
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
- veadk/database/database_adapter.py +0 -368
- veadk/database/database_factory.py +0 -80
- veadk/database/kv/redis_database.py +0 -159
- veadk/database/local_database.py +0 -61
- veadk/database/relational/mysql_database.py +0 -173
- veadk/database/vector/opensearch_vector_database.py +0 -263
- veadk/database/vector/type.py +0 -50
- veadk/database/viking/viking_database.py +0 -471
- veadk/database/viking/viking_memory_db.py +0 -525
- /veadk/{database/kv → auth}/__init__.py +0 -0
- /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
- /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
- /veadk/{database/viking → configs}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
veadk/memory/long_term_memory.py
CHANGED
|
@@ -13,8 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
# adapted from Google ADK memory service adk-python/src/google/adk/memory/vertex_ai_memory_bank_service.py at 0a9e67dbca67789247e882d16b139dbdc76a329a · google/adk-python
|
|
16
|
+
|
|
16
17
|
import json
|
|
17
|
-
from typing import Literal
|
|
18
|
+
from typing import Any, Literal
|
|
18
19
|
|
|
19
20
|
from google.adk.events.event import Event
|
|
20
21
|
from google.adk.memory.base_memory_service import (
|
|
@@ -24,47 +25,98 @@ from google.adk.memory.base_memory_service import (
|
|
|
24
25
|
from google.adk.memory.memory_entry import MemoryEntry
|
|
25
26
|
from google.adk.sessions import Session
|
|
26
27
|
from google.genai import types
|
|
27
|
-
from
|
|
28
|
+
from pydantic import BaseModel, Field
|
|
29
|
+
from typing_extensions import Union, override
|
|
28
30
|
|
|
29
|
-
from veadk.
|
|
30
|
-
|
|
31
|
+
from veadk.memory.long_term_memory_backends.base_backend import (
|
|
32
|
+
BaseLongTermMemoryBackend,
|
|
33
|
+
)
|
|
31
34
|
from veadk.utils.logger import get_logger
|
|
32
35
|
|
|
33
36
|
logger = get_logger(__name__)
|
|
34
37
|
|
|
35
38
|
|
|
39
|
+
def _get_backend_cls(backend: str) -> type[BaseLongTermMemoryBackend]:
|
|
40
|
+
match backend:
|
|
41
|
+
case "local":
|
|
42
|
+
from veadk.memory.long_term_memory_backends.in_memory_backend import (
|
|
43
|
+
InMemoryLTMBackend,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return InMemoryLTMBackend
|
|
47
|
+
case "opensearch":
|
|
48
|
+
from veadk.memory.long_term_memory_backends.opensearch_backend import (
|
|
49
|
+
OpensearchLTMBackend,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return OpensearchLTMBackend
|
|
53
|
+
case "viking":
|
|
54
|
+
from veadk.memory.long_term_memory_backends.vikingdb_memory_backend import (
|
|
55
|
+
VikingDBLTMBackend,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return VikingDBLTMBackend
|
|
59
|
+
case "redis":
|
|
60
|
+
from veadk.memory.long_term_memory_backends.redis_backend import (
|
|
61
|
+
RedisLTMBackend,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return RedisLTMBackend
|
|
65
|
+
|
|
66
|
+
raise ValueError(f"Unsupported long term memory backend: {backend}")
|
|
67
|
+
|
|
68
|
+
|
|
36
69
|
def build_long_term_memory_index(app_name: str, user_id: str):
|
|
37
70
|
return f"{app_name}_{user_id}"
|
|
38
71
|
|
|
39
72
|
|
|
40
|
-
class LongTermMemory(BaseMemoryService):
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
top_k: int = 5,
|
|
47
|
-
):
|
|
48
|
-
if backend == "viking":
|
|
49
|
-
logger.warning(
|
|
50
|
-
"`viking` backend is deprecated, switching to `viking_mem` backend."
|
|
51
|
-
)
|
|
52
|
-
backend = "viking_mem"
|
|
53
|
-
self.top_k = top_k
|
|
54
|
-
self.backend = backend
|
|
73
|
+
class LongTermMemory(BaseMemoryService, BaseModel):
|
|
74
|
+
backend: Union[
|
|
75
|
+
Literal["local", "opensearch", "redis", "viking", "viking_mem"],
|
|
76
|
+
BaseLongTermMemoryBackend,
|
|
77
|
+
] = "opensearch"
|
|
78
|
+
"""Long term memory backend type"""
|
|
55
79
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
)
|
|
80
|
+
backend_config: dict = Field(default_factory=dict)
|
|
81
|
+
"""Long term memory backend configuration"""
|
|
59
82
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
)
|
|
63
|
-
self.adapter = get_long_term_memory_database_adapter(self.db_client)
|
|
83
|
+
top_k: int = 5
|
|
84
|
+
"""Number of top similar documents to retrieve during search."""
|
|
64
85
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
86
|
+
app_name: str = ""
|
|
87
|
+
|
|
88
|
+
user_id: str = ""
|
|
89
|
+
|
|
90
|
+
def model_post_init(self, __context: Any) -> None:
|
|
91
|
+
self._backend = None
|
|
92
|
+
|
|
93
|
+
# Once user define a backend instance, use it directly
|
|
94
|
+
if isinstance(self.backend, BaseLongTermMemoryBackend):
|
|
95
|
+
self._backend = self.backend
|
|
96
|
+
logger.info(
|
|
97
|
+
f"Initialized long term memory with provided backend instance {self._backend.__class__.__name__}"
|
|
98
|
+
)
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
if self.backend_config:
|
|
102
|
+
logger.warning(
|
|
103
|
+
f"Initialized long term memory backend {self.backend} with config. We will ignore `app_name` and `user_id` if provided."
|
|
104
|
+
)
|
|
105
|
+
self._backend = _get_backend_cls(self.backend)(**self.backend_config)
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
if self.app_name and self.user_id:
|
|
109
|
+
self._index = build_long_term_memory_index(
|
|
110
|
+
app_name=self.app_name, user_id=self.user_id
|
|
111
|
+
)
|
|
112
|
+
logger.info(f"Long term memory index set to {self._index}.")
|
|
113
|
+
self._backend = _get_backend_cls(self.backend)(
|
|
114
|
+
index=self._index, **self.backend_config if self.backend_config else {}
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
logger.warning(
|
|
118
|
+
"Neither `backend_instance`, `backend_config`, nor (`app_name`/`user_id`) is provided, the long term memory storage will initialize when adding a session."
|
|
119
|
+
)
|
|
68
120
|
|
|
69
121
|
def _filter_and_convert_events(self, events: list[Event]) -> list[str]:
|
|
70
122
|
final_events = []
|
|
@@ -92,40 +144,57 @@ class LongTermMemory(BaseMemoryService):
|
|
|
92
144
|
self,
|
|
93
145
|
session: Session,
|
|
94
146
|
):
|
|
147
|
+
app_name = session.app_name
|
|
148
|
+
user_id = session.user_id
|
|
149
|
+
|
|
150
|
+
if self._index != build_long_term_memory_index(app_name, user_id):
|
|
151
|
+
logger.warning(
|
|
152
|
+
f"The `app_name` or `user_id` is different from the initialized one, skip add session to memory. Initialized index: {self._index}, current built index: {build_long_term_memory_index(app_name, user_id)}"
|
|
153
|
+
)
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
if not self._backend and isinstance(self.backend, str):
|
|
157
|
+
self._index = build_long_term_memory_index(app_name, user_id)
|
|
158
|
+
self._backend = _get_backend_cls(self.backend)(
|
|
159
|
+
index=self._index, **self.backend_config if self.backend_config else {}
|
|
160
|
+
)
|
|
161
|
+
logger.info(
|
|
162
|
+
f"Initialize long term memory backend now, index is {self._index}"
|
|
163
|
+
)
|
|
164
|
+
|
|
95
165
|
event_strings = self._filter_and_convert_events(session.events)
|
|
96
|
-
index = build_long_term_memory_index(session.app_name, session.user_id)
|
|
97
166
|
|
|
98
167
|
logger.info(
|
|
99
|
-
f"Adding {len(event_strings)} events to long term memory: index={
|
|
168
|
+
f"Adding {len(event_strings)} events to long term memory: index={self._index}"
|
|
100
169
|
)
|
|
101
170
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
self.adapter.add(data=event_strings, index=index, user_id=session.user_id)
|
|
105
|
-
else:
|
|
106
|
-
self.adapter.add(data=event_strings, index=index)
|
|
171
|
+
if self._backend:
|
|
172
|
+
self._backend.save_memory(event_strings=event_strings, user_id=user_id)
|
|
107
173
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
174
|
+
logger.info(
|
|
175
|
+
f"Added {len(event_strings)} events to long term memory: index={self._index}"
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
logger.error(
|
|
179
|
+
"Long term memory backend initialize failed, cannot add session to memory."
|
|
180
|
+
)
|
|
111
181
|
|
|
112
182
|
@override
|
|
113
183
|
async def search_memory(self, *, app_name: str, user_id: str, query: str):
|
|
114
|
-
|
|
184
|
+
# prevent model invoke `load_memory` before add session to this memory
|
|
185
|
+
if not self._backend:
|
|
186
|
+
logger.error(
|
|
187
|
+
"Long term memory backend is not initialized, cannot search memory."
|
|
188
|
+
)
|
|
189
|
+
return SearchMemoryResponse(memories=[])
|
|
115
190
|
|
|
116
191
|
logger.info(
|
|
117
|
-
f"Searching long term memory: query={query} index={
|
|
192
|
+
f"Searching long term memory: query={query} index={self._index} top_k={self.top_k}"
|
|
118
193
|
)
|
|
119
194
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
query=query, index=index, top_k=self.top_k, user_id=user_id
|
|
124
|
-
)
|
|
125
|
-
else:
|
|
126
|
-
memory_chunks = self.adapter.query(
|
|
127
|
-
query=query, index=index, top_k=self.top_k
|
|
128
|
-
)
|
|
195
|
+
memory_chunks = self._backend.search_memory(
|
|
196
|
+
query=query, top_k=self.top_k, user_id=user_id
|
|
197
|
+
)
|
|
129
198
|
|
|
130
199
|
memory_events = []
|
|
131
200
|
for memory in memory_chunks:
|
|
@@ -153,6 +222,6 @@ class LongTermMemory(BaseMemoryService):
|
|
|
153
222
|
)
|
|
154
223
|
|
|
155
224
|
logger.info(
|
|
156
|
-
f"Return {len(memory_events)} memory events for query: {query} index={
|
|
225
|
+
f"Return {len(memory_events)} memory events for query: {query} index={self._index}"
|
|
157
226
|
)
|
|
158
227
|
return SearchMemoryResponse(memories=memory_events)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -13,33 +13,21 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from typing import Any
|
|
17
16
|
|
|
17
|
+
from pydantic import BaseModel
|
|
18
18
|
|
|
19
|
-
class DatabaseType:
|
|
20
|
-
LOCAL = "local"
|
|
21
|
-
RELATIONAL = "relational"
|
|
22
|
-
VECTOR = "vector"
|
|
23
|
-
KV = "kv"
|
|
24
19
|
|
|
20
|
+
class BaseLongTermMemoryBackend(ABC, BaseModel):
|
|
21
|
+
index: str
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
type: type of the database
|
|
31
|
-
|
|
32
|
-
Note:
|
|
33
|
-
No `update` function support currently.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
def __init__(self):
|
|
37
|
-
pass
|
|
38
|
-
|
|
39
|
-
def add(self, texts: list[Any], **kwargs: Any): ...
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def precheck_index_naming(self):
|
|
25
|
+
"""Check the index name is valid or not"""
|
|
40
26
|
|
|
41
27
|
@abstractmethod
|
|
42
|
-
def
|
|
28
|
+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
|
|
29
|
+
"""Save memory to long term memory backend"""
|
|
43
30
|
|
|
44
31
|
@abstractmethod
|
|
45
|
-
def
|
|
32
|
+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
|
|
33
|
+
"""Retrieve memory from long term memory backend"""
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from llama_index.core import Document, VectorStoreIndex
|
|
16
|
+
from llama_index.core.schema import BaseNode
|
|
17
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
from typing_extensions import Any, override
|
|
20
|
+
|
|
21
|
+
from veadk.configs.model_configs import EmbeddingModelConfig
|
|
22
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
23
|
+
from veadk.memory.long_term_memory_backends.base_backend import (
|
|
24
|
+
BaseLongTermMemoryBackend,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class InMemoryLTMBackend(BaseLongTermMemoryBackend):
|
|
29
|
+
embedding_config: EmbeddingModelConfig = Field(default_factory=EmbeddingModelConfig)
|
|
30
|
+
"""Embedding model configs"""
|
|
31
|
+
|
|
32
|
+
def precheck_index_naming(self):
|
|
33
|
+
# no checking
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def model_post_init(self, __context: Any) -> None:
|
|
37
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
38
|
+
model_name=self.embedding_config.name,
|
|
39
|
+
api_key=self.embedding_config.api_key,
|
|
40
|
+
api_base=self.embedding_config.api_base,
|
|
41
|
+
)
|
|
42
|
+
self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
|
|
43
|
+
|
|
44
|
+
@override
|
|
45
|
+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
|
|
46
|
+
for event_string in event_strings:
|
|
47
|
+
document = Document(text=event_string)
|
|
48
|
+
nodes = self._split_documents([document])
|
|
49
|
+
self._vector_index.insert_nodes(nodes)
|
|
50
|
+
return True
|
|
51
|
+
|
|
52
|
+
@override
|
|
53
|
+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
|
|
54
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
55
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
56
|
+
return [node.text for node in retrieved_nodes]
|
|
57
|
+
|
|
58
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
59
|
+
"""Split document into chunks"""
|
|
60
|
+
nodes = []
|
|
61
|
+
for document in documents:
|
|
62
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
63
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
64
|
+
nodes.extend(_nodes)
|
|
65
|
+
return nodes
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
from llama_index.core import (
|
|
18
|
+
Document,
|
|
19
|
+
StorageContext,
|
|
20
|
+
VectorStoreIndex,
|
|
21
|
+
)
|
|
22
|
+
from llama_index.core.schema import BaseNode
|
|
23
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
24
|
+
from pydantic import Field
|
|
25
|
+
from typing_extensions import Any, override
|
|
26
|
+
|
|
27
|
+
import veadk.config # noqa E401
|
|
28
|
+
from veadk.configs.database_configs import OpensearchConfig
|
|
29
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
30
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
31
|
+
from veadk.memory.long_term_memory_backends.base_backend import (
|
|
32
|
+
BaseLongTermMemoryBackend,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
from llama_index.vector_stores.opensearch import (
|
|
37
|
+
OpensearchVectorClient,
|
|
38
|
+
OpensearchVectorStore,
|
|
39
|
+
)
|
|
40
|
+
except ImportError:
|
|
41
|
+
raise ImportError(
|
|
42
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class OpensearchLTMBackend(BaseLongTermMemoryBackend):
|
|
47
|
+
opensearch_config: OpensearchConfig = Field(default_factory=OpensearchConfig)
|
|
48
|
+
"""Opensearch client configs"""
|
|
49
|
+
|
|
50
|
+
embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
|
|
51
|
+
default_factory=EmbeddingModelConfig
|
|
52
|
+
)
|
|
53
|
+
"""Embedding model configs"""
|
|
54
|
+
|
|
55
|
+
def precheck_index_naming(self):
|
|
56
|
+
if not (
|
|
57
|
+
isinstance(self.index, str)
|
|
58
|
+
and not self.index.startswith(("_", "-"))
|
|
59
|
+
and self.index.islower()
|
|
60
|
+
and re.match(r"^[a-z0-9_\-.]+$", self.index)
|
|
61
|
+
):
|
|
62
|
+
raise ValueError(
|
|
63
|
+
"The index name does not conform to the naming rules of OpenSearch"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def model_post_init(self, __context: Any) -> None:
|
|
67
|
+
self._opensearch_client = OpensearchVectorClient(
|
|
68
|
+
endpoint=self.opensearch_config.host,
|
|
69
|
+
port=self.opensearch_config.port,
|
|
70
|
+
http_auth=(
|
|
71
|
+
self.opensearch_config.username,
|
|
72
|
+
self.opensearch_config.password,
|
|
73
|
+
),
|
|
74
|
+
use_ssl=True,
|
|
75
|
+
verify_certs=False,
|
|
76
|
+
dim=self.embedding_config.dim,
|
|
77
|
+
index=self.index, # collection name
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
self._vector_store = OpensearchVectorStore(client=self._opensearch_client)
|
|
81
|
+
|
|
82
|
+
self._storage_context = StorageContext.from_defaults(
|
|
83
|
+
vector_store=self._vector_store
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
87
|
+
model_name=self.embedding_config.name,
|
|
88
|
+
api_key=self.embedding_config.api_key,
|
|
89
|
+
api_base=self.embedding_config.api_base,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self._vector_index = VectorStoreIndex.from_documents(
|
|
93
|
+
documents=[],
|
|
94
|
+
storage_context=self._storage_context,
|
|
95
|
+
embed_model=self._embed_model,
|
|
96
|
+
)
|
|
97
|
+
self._retriever = self._vector_index.as_retriever()
|
|
98
|
+
|
|
99
|
+
@override
|
|
100
|
+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
|
|
101
|
+
for event_string in event_strings:
|
|
102
|
+
document = Document(text=event_string)
|
|
103
|
+
nodes = self._split_documents([document])
|
|
104
|
+
self._vector_index.insert_nodes(nodes)
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
@override
|
|
108
|
+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
|
|
109
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
110
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
111
|
+
return [node.text for node in retrieved_nodes]
|
|
112
|
+
|
|
113
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
114
|
+
"""Split document into chunks"""
|
|
115
|
+
nodes = []
|
|
116
|
+
for document in documents:
|
|
117
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
118
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
119
|
+
nodes.extend(_nodes)
|
|
120
|
+
return nodes
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from llama_index.core import (
|
|
16
|
+
Document,
|
|
17
|
+
StorageContext,
|
|
18
|
+
VectorStoreIndex,
|
|
19
|
+
)
|
|
20
|
+
from llama_index.core.schema import BaseNode
|
|
21
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
22
|
+
from pydantic import Field
|
|
23
|
+
from typing_extensions import Any, override
|
|
24
|
+
|
|
25
|
+
import veadk.config # noqa E401
|
|
26
|
+
from veadk.configs.database_configs import RedisConfig
|
|
27
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
28
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
29
|
+
from veadk.memory.long_term_memory_backends.base_backend import (
|
|
30
|
+
BaseLongTermMemoryBackend,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from llama_index.vector_stores.redis import RedisVectorStore
|
|
35
|
+
from llama_index.vector_stores.redis.schema import (
|
|
36
|
+
RedisIndexInfo,
|
|
37
|
+
RedisVectorStoreSchema,
|
|
38
|
+
)
|
|
39
|
+
from redis import Redis
|
|
40
|
+
from redisvl.schema.fields import BaseVectorFieldAttributes
|
|
41
|
+
except ImportError:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class RedisLTMBackend(BaseLongTermMemoryBackend):
|
|
48
|
+
redis_config: RedisConfig = Field(default_factory=RedisConfig)
|
|
49
|
+
"""Redis client configs"""
|
|
50
|
+
|
|
51
|
+
embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
|
|
52
|
+
default_factory=EmbeddingModelConfig
|
|
53
|
+
)
|
|
54
|
+
"""Embedding model configs"""
|
|
55
|
+
|
|
56
|
+
def precheck_index_naming(self):
|
|
57
|
+
# no checking
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
def model_post_init(self, __context: Any) -> None:
|
|
61
|
+
# We will use `from_url` to init Redis client once the
|
|
62
|
+
# AK/SK -> STS token is ready.
|
|
63
|
+
# self._redis_client = Redis.from_url(url=...)
|
|
64
|
+
|
|
65
|
+
self._redis_client = Redis(
|
|
66
|
+
host=self.redis_config.host,
|
|
67
|
+
port=self.redis_config.port,
|
|
68
|
+
db=self.redis_config.db,
|
|
69
|
+
password=self.redis_config.password,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
73
|
+
model_name=self.embedding_config.name,
|
|
74
|
+
api_key=self.embedding_config.api_key,
|
|
75
|
+
api_base=self.embedding_config.api_base,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
self._schema = RedisVectorStoreSchema(
|
|
79
|
+
index=RedisIndexInfo(name=self.index),
|
|
80
|
+
)
|
|
81
|
+
if "vector" in self._schema.fields:
|
|
82
|
+
vector_field = self._schema.fields["vector"]
|
|
83
|
+
if (
|
|
84
|
+
vector_field
|
|
85
|
+
and vector_field.attrs
|
|
86
|
+
and isinstance(vector_field.attrs, BaseVectorFieldAttributes)
|
|
87
|
+
):
|
|
88
|
+
vector_field.attrs.dims = self.embedding_config.dim
|
|
89
|
+
self._vector_store = RedisVectorStore(
|
|
90
|
+
schema=self._schema,
|
|
91
|
+
redis_client=self._redis_client,
|
|
92
|
+
overwrite=True,
|
|
93
|
+
collection_name=self.index,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
self._storage_context = StorageContext.from_defaults(
|
|
97
|
+
vector_store=self._vector_store
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
self._vector_index = VectorStoreIndex.from_documents(
|
|
101
|
+
documents=[],
|
|
102
|
+
storage_context=self._storage_context,
|
|
103
|
+
embed_model=self._embed_model,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@override
|
|
107
|
+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
|
|
108
|
+
for event_string in event_strings:
|
|
109
|
+
document = Document(text=event_string)
|
|
110
|
+
nodes = self._split_documents([document])
|
|
111
|
+
self._vector_index.insert_nodes(nodes)
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
@override
|
|
115
|
+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
|
|
116
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
117
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
118
|
+
return [node.text for node in retrieved_nodes]
|
|
119
|
+
|
|
120
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
121
|
+
"""Split document into chunks"""
|
|
122
|
+
nodes = []
|
|
123
|
+
for document in documents:
|
|
124
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
125
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
126
|
+
nodes.extend(_nodes)
|
|
127
|
+
return nodes
|