veadk-python 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/agent.py +3 -2
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/cli/cli.py +3 -1
- veadk/cli/cli_eval.py +160 -0
- veadk/cli/cli_prompt.py +9 -2
- veadk/cli/cli_web.py +6 -1
- veadk/configs/database_configs.py +43 -0
- veadk/configs/model_configs.py +32 -0
- veadk/consts.py +11 -4
- veadk/evaluation/adk_evaluator/adk_evaluator.py +5 -2
- veadk/evaluation/base_evaluator.py +95 -68
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +23 -15
- veadk/evaluation/eval_set_recorder.py +2 -2
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +9 -3
- veadk/integrations/ve_tls/utils.py +1 -2
- veadk/integrations/ve_tls/ve_tls.py +9 -5
- veadk/integrations/ve_tos/ve_tos.py +542 -68
- veadk/knowledgebase/backends/base_backend.py +59 -0
- veadk/knowledgebase/backends/in_memory_backend.py +82 -0
- veadk/knowledgebase/backends/opensearch_backend.py +136 -0
- veadk/knowledgebase/backends/redis_backend.py +144 -0
- veadk/knowledgebase/backends/utils.py +91 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +524 -0
- veadk/{database/__init__.py → knowledgebase/entry.py} +10 -2
- veadk/knowledgebase/knowledgebase.py +120 -139
- veadk/memory/__init__.py +22 -0
- veadk/memory/long_term_memory.py +124 -41
- veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
- veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
- veadk/memory/long_term_memory_backends/mem0_backend.py +129 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
- veadk/memory/short_term_memory.py +80 -72
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
- veadk/runner.py +12 -19
- veadk/tools/builtin_tools/generate_image.py +355 -0
- veadk/tools/builtin_tools/image_edit.py +56 -16
- veadk/tools/builtin_tools/image_generate.py +51 -15
- veadk/tools/builtin_tools/video_generate.py +41 -41
- veadk/tools/builtin_tools/web_scraper.py +1 -1
- veadk/tools/builtin_tools/web_search.py +7 -7
- veadk/tools/load_knowledgebase_tool.py +2 -8
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +21 -3
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +24 -6
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +2 -0
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
- veadk/tracing/telemetry/exporters/tls_exporter.py +2 -0
- veadk/tracing/telemetry/opentelemetry_tracer.py +13 -10
- veadk/tracing/telemetry/telemetry.py +66 -63
- veadk/utils/misc.py +15 -0
- veadk/version.py +1 -1
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/METADATA +28 -5
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/RECORD +65 -56
- veadk/database/database_adapter.py +0 -533
- veadk/database/database_factory.py +0 -80
- veadk/database/kv/redis_database.py +0 -159
- veadk/database/local_database.py +0 -62
- veadk/database/relational/mysql_database.py +0 -173
- veadk/database/vector/opensearch_vector_database.py +0 -263
- veadk/database/vector/type.py +0 -50
- veadk/database/viking/__init__.py +0 -13
- veadk/database/viking/viking_database.py +0 -638
- veadk/database/viking/viking_memory_db.py +0 -525
- /veadk/{database/kv → knowledgebase/backends}/__init__.py +0 -0
- /veadk/{database/relational → memory/long_term_memory_backends}/__init__.py +0 -0
- /veadk/{database/vector → memory/short_term_memory_backends}/__init__.py +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -11,168 +11,149 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import io
|
|
15
|
-
import os.path
|
|
16
|
-
from typing import Any, BinaryIO, Literal, TextIO
|
|
17
14
|
|
|
18
|
-
from
|
|
15
|
+
from typing import Any, Callable, Literal
|
|
19
16
|
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
from typing_extensions import Union
|
|
19
|
+
|
|
20
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
21
|
+
from veadk.knowledgebase.entry import KnowledgebaseEntry
|
|
23
22
|
from veadk.utils.logger import get_logger
|
|
24
23
|
|
|
25
24
|
logger = get_logger(__name__)
|
|
26
25
|
|
|
27
26
|
|
|
27
|
+
def _get_backend_cls(backend: str) -> type[BaseKnowledgebaseBackend]:
|
|
28
|
+
match backend:
|
|
29
|
+
case "local":
|
|
30
|
+
from veadk.knowledgebase.backends.in_memory_backend import (
|
|
31
|
+
InMemoryKnowledgeBackend,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return InMemoryKnowledgeBackend
|
|
35
|
+
case "opensearch":
|
|
36
|
+
from veadk.knowledgebase.backends.opensearch_backend import (
|
|
37
|
+
OpensearchKnowledgeBackend,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
return OpensearchKnowledgeBackend
|
|
41
|
+
case "viking":
|
|
42
|
+
from veadk.knowledgebase.backends.vikingdb_knowledge_backend import (
|
|
43
|
+
VikingDBKnowledgeBackend,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return VikingDBKnowledgeBackend
|
|
47
|
+
case "redis":
|
|
48
|
+
from veadk.knowledgebase.backends.redis_backend import (
|
|
49
|
+
RedisKnowledgeBackend,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return RedisKnowledgeBackend
|
|
53
|
+
|
|
54
|
+
raise ValueError(f"Unsupported knowledgebase backend: {backend}")
|
|
55
|
+
|
|
56
|
+
|
|
28
57
|
def build_knowledgebase_index(app_name: str):
|
|
29
58
|
return f"veadk_kb_{app_name}"
|
|
30
59
|
|
|
31
60
|
|
|
32
61
|
class KnowledgeBase(BaseModel):
|
|
33
|
-
backend:
|
|
62
|
+
backend: Union[
|
|
63
|
+
Literal["local", "opensearch", "viking", "redis"], BaseKnowledgebaseBackend
|
|
64
|
+
] = "local"
|
|
65
|
+
"""Knowledgebase backend type. Supported backends are:
|
|
66
|
+
- local: In-memory knowledgebase, data will be lost when the program exits.
|
|
67
|
+
- opensearch: OpenSearch knowledgebase, requires an OpenSearch cluster.
|
|
68
|
+
- viking: Volcengine VikingDB knowledgebase, requires VikingDB service.
|
|
69
|
+
- redis: Redis knowledgebase, requires Redis with vector search capability.
|
|
70
|
+
Default is `local`."""
|
|
71
|
+
|
|
72
|
+
backend_config: dict = Field(default_factory=dict)
|
|
73
|
+
"""Configuration for the backend"""
|
|
74
|
+
|
|
34
75
|
top_k: int = 10
|
|
35
|
-
|
|
76
|
+
"""Number of top similar documents to retrieve during search.
|
|
36
77
|
|
|
37
|
-
|
|
38
|
-
logger.info(
|
|
39
|
-
f"Initializing knowledgebase: backend={self.backend} top_k={self.top_k}"
|
|
40
|
-
)
|
|
78
|
+
Default is 10."""
|
|
41
79
|
|
|
42
|
-
|
|
43
|
-
backend=self.backend, config=self.db_config
|
|
44
|
-
)
|
|
45
|
-
self._adapter = get_knowledgebase_database_adapter(self._db_client)
|
|
80
|
+
app_name: str = ""
|
|
46
81
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
)
|
|
82
|
+
index: str = ""
|
|
83
|
+
"""The name of the knowledgebase index. If not provided, it will be generated based on the `app_name`."""
|
|
50
84
|
|
|
51
|
-
def
|
|
52
|
-
self,
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
- str: A single file path. (viking only)
|
|
62
|
-
- list[str]: A list of file paths.
|
|
63
|
-
- TextIO: A file object (TextIO). (viking only) file descriptor
|
|
64
|
-
- BinaryIO: A file object (BinaryIO). (viking only) file descriptor
|
|
65
|
-
- bytes: Binary data. (viking only) binary data (f.read())
|
|
66
|
-
app_name: index name
|
|
67
|
-
**kwargs: Additional keyword arguments.
|
|
68
|
-
- file_name (str | list[str]): The file name or a list of file names (including suffix). (viking only)
|
|
69
|
-
"""
|
|
70
|
-
if self.backend != "viking" and not (
|
|
71
|
-
isinstance(data, str) or isinstance(data, list)
|
|
72
|
-
):
|
|
85
|
+
def model_post_init(self, __context: Any) -> None:
|
|
86
|
+
if isinstance(self.backend, BaseKnowledgebaseBackend):
|
|
87
|
+
self._backend = self.backend
|
|
88
|
+
logger.info(
|
|
89
|
+
f"Initialized knowledgebase with provided backend instance {self._backend.__class__.__name__}"
|
|
90
|
+
)
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# must provide at least one of them
|
|
94
|
+
if not self.app_name and not self.index:
|
|
73
95
|
raise ValueError(
|
|
74
|
-
"
|
|
96
|
+
"Either `app_name` or `index` must be provided one of them."
|
|
75
97
|
)
|
|
76
98
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Case 1: Handling file paths or lists of file paths (str)
|
|
82
|
-
if isinstance(data, str) and os.path.isfile(data):
|
|
83
|
-
# Get the file name (including the suffix)
|
|
84
|
-
if "file_name" not in kwargs or not kwargs["file_name"]:
|
|
85
|
-
kwargs["file_name"] = os.path.basename(data)
|
|
86
|
-
return self._adapter.add(data=data, index=index, **kwargs)
|
|
87
|
-
# Case 2: Handling when list[str] is a full path (list[str])
|
|
88
|
-
if isinstance(data, list):
|
|
89
|
-
if all(isinstance(item, str) for item in data):
|
|
90
|
-
all_paths = all(os.path.isfile(item) for item in data)
|
|
91
|
-
all_not_paths = all(not os.path.isfile(item) for item in data)
|
|
92
|
-
if all_paths:
|
|
93
|
-
if "file_name" not in kwargs or not kwargs["file_name"]:
|
|
94
|
-
kwargs["file_name"] = [
|
|
95
|
-
os.path.basename(item) for item in data
|
|
96
|
-
]
|
|
97
|
-
return self._adapter.add(data=data, index=index, **kwargs)
|
|
98
|
-
elif (
|
|
99
|
-
not all_not_paths
|
|
100
|
-
): # Prevent the occurrence of non-existent paths
|
|
101
|
-
# There is a mixture of paths and non-paths
|
|
102
|
-
raise ValueError(
|
|
103
|
-
"Mixed file paths and content strings in list are not allowed"
|
|
104
|
-
)
|
|
105
|
-
# Case 3: Handling strings or string arrays (content) (str or list[str])
|
|
106
|
-
if isinstance(data, str) or (
|
|
107
|
-
isinstance(data, list) and all(isinstance(item, str) for item in data)
|
|
108
|
-
):
|
|
109
|
-
if "file_name" not in kwargs or not kwargs["file_name"]:
|
|
110
|
-
if isinstance(data, str):
|
|
111
|
-
kwargs["file_name"] = f"{formatted_timestamp()}.txt"
|
|
112
|
-
else: # list[str] without file_names
|
|
113
|
-
prefix_file_name = formatted_timestamp()
|
|
114
|
-
kwargs["file_name"] = [
|
|
115
|
-
f"{prefix_file_name}_{i}.txt" for i in range(len(data))
|
|
116
|
-
]
|
|
117
|
-
return self._adapter.add(data=data, index=index, **kwargs)
|
|
118
|
-
|
|
119
|
-
# Case 4: Handling binary data (bytes)
|
|
120
|
-
if isinstance(data, bytes):
|
|
121
|
-
# user must give file_name
|
|
122
|
-
if "file_name" not in kwargs:
|
|
123
|
-
raise ValueError("file_name must be provided for binary data")
|
|
124
|
-
return self._adapter.add(data=data, index=index, **kwargs)
|
|
125
|
-
|
|
126
|
-
# Case 5: Handling file objects TextIO or BinaryIO
|
|
127
|
-
if isinstance(data, (io.TextIOWrapper, io.BufferedReader)):
|
|
128
|
-
if not kwargs.get("file_name") and hasattr(data, "name"):
|
|
129
|
-
kwargs["file_name"] = os.path.basename(data.name)
|
|
130
|
-
return self._adapter.add(data=data, index=index, **kwargs)
|
|
131
|
-
# Case6: Unsupported data type
|
|
132
|
-
raise TypeError(f"Unsupported data type: {type(data)}")
|
|
133
|
-
|
|
134
|
-
if not isinstance(data, list):
|
|
135
|
-
raise TypeError(
|
|
136
|
-
f"Unsupported data type: {type(data)}. Only viking support file_path and file bytes"
|
|
99
|
+
# priority use index
|
|
100
|
+
if self.app_name and self.index:
|
|
101
|
+
logger.warning(
|
|
102
|
+
"`app_name` and `index` are both provided, using `index` as the knowledgebase index name."
|
|
137
103
|
)
|
|
138
|
-
# not viking
|
|
139
|
-
return self._adapter.add(data=data, index=index, **kwargs)
|
|
140
104
|
|
|
141
|
-
|
|
142
|
-
|
|
105
|
+
# generate index name if `index` not provided but `app_name` is provided
|
|
106
|
+
if self.app_name and not self.index:
|
|
107
|
+
self.index = build_knowledgebase_index(self.app_name)
|
|
108
|
+
logger.info(
|
|
109
|
+
f"Knowledgebase index is set to {self.index} (generated by the app_name: {self.app_name})."
|
|
110
|
+
)
|
|
143
111
|
|
|
144
112
|
logger.info(
|
|
145
|
-
f"
|
|
113
|
+
f"Initializing knowledgebase: backend={self.backend} top_k={self.top_k}"
|
|
146
114
|
)
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
if
|
|
169
|
-
|
|
170
|
-
return self._adapter.list_docs(index=index, offset=offset, limit=limit)
|
|
171
|
-
else:
|
|
172
|
-
raise NotImplementedError(
|
|
173
|
-
f"list_docs not supported for {self.backend}, only viking support list_docs"
|
|
174
|
-
)
|
|
115
|
+
self._backend = _get_backend_cls(self.backend)(
|
|
116
|
+
index=self.index, **self.backend_config if self.backend_config else {}
|
|
117
|
+
)
|
|
118
|
+
logger.info(
|
|
119
|
+
f"Initialized knowledgebase with backend {self._backend.__class__.__name__}"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def add_from_directory(self, directory: str, **kwargs) -> bool:
|
|
123
|
+
"""Add knowledge from file path to knowledgebase"""
|
|
124
|
+
return self._backend.add_from_directory(directory=directory, **kwargs)
|
|
125
|
+
|
|
126
|
+
def add_from_files(self, files: list[str], **kwargs) -> bool:
|
|
127
|
+
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
|
|
128
|
+
return self._backend.add_from_files(files=files, **kwargs)
|
|
129
|
+
|
|
130
|
+
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
|
|
131
|
+
"""Add knowledge from text to knowledgebase"""
|
|
132
|
+
return self._backend.add_from_text(text=text, **kwargs)
|
|
133
|
+
|
|
134
|
+
def search(self, query: str, top_k: int = 0, **kwargs) -> list[KnowledgebaseEntry]:
|
|
135
|
+
"""Search knowledge from knowledgebase"""
|
|
136
|
+
if top_k == 0:
|
|
137
|
+
top_k = self.top_k
|
|
175
138
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
139
|
+
_entries = self._backend.search(query=query, top_k=top_k, **kwargs)
|
|
140
|
+
|
|
141
|
+
entries = []
|
|
142
|
+
for entry in _entries:
|
|
143
|
+
if isinstance(entry, KnowledgebaseEntry):
|
|
144
|
+
entries.append(entry)
|
|
145
|
+
elif isinstance(entry, str):
|
|
146
|
+
entries.append(KnowledgebaseEntry(content=entry))
|
|
147
|
+
else:
|
|
148
|
+
logger.error(
|
|
149
|
+
f"Unsupported entry type from backend search method: {type(entry)} with {entry}. Expected `KnowledgebaseEntry` or `str`. Skip for this entry."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return entries
|
|
153
|
+
|
|
154
|
+
def __getattr__(self, name) -> Callable:
|
|
155
|
+
"""In case of knowledgebase have no backends' methods (`delete`, `list_chunks`, etc)
|
|
156
|
+
|
|
157
|
+
For example, knowledgebase.delete(...) -> self._backend.delete(...)
|
|
158
|
+
"""
|
|
159
|
+
return getattr(self._backend, name)
|
veadk/memory/__init__.py
CHANGED
|
@@ -11,3 +11,25 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from veadk.memory.long_term_memory import LongTermMemory
|
|
19
|
+
from veadk.memory.short_term_memory import ShortTermMemory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Lazy loading for classes
|
|
23
|
+
def __getattr__(name):
|
|
24
|
+
if name == "ShortTermMemory":
|
|
25
|
+
from veadk.memory.short_term_memory import ShortTermMemory
|
|
26
|
+
|
|
27
|
+
return ShortTermMemory
|
|
28
|
+
if name == "LongTeremMemory":
|
|
29
|
+
from veadk.memory.long_term_memory import LongTermMemory
|
|
30
|
+
|
|
31
|
+
return LongTermMemory
|
|
32
|
+
raise AttributeError(f"module 'veadk.memory' has no attribute '{name}'")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
__all__ = ["ShortTermMemory", "LongTermMemory"]
|
veadk/memory/long_term_memory.py
CHANGED
|
@@ -25,45 +25,110 @@ from google.adk.memory.base_memory_service import (
|
|
|
25
25
|
from google.adk.memory.memory_entry import MemoryEntry
|
|
26
26
|
from google.adk.sessions import Session
|
|
27
27
|
from google.genai import types
|
|
28
|
-
from pydantic import BaseModel
|
|
29
|
-
from typing_extensions import override
|
|
28
|
+
from pydantic import BaseModel, Field
|
|
29
|
+
from typing_extensions import Union, override
|
|
30
30
|
|
|
31
|
-
from veadk.
|
|
32
|
-
|
|
31
|
+
from veadk.memory.long_term_memory_backends.base_backend import (
|
|
32
|
+
BaseLongTermMemoryBackend,
|
|
33
|
+
)
|
|
33
34
|
from veadk.utils.logger import get_logger
|
|
34
35
|
|
|
35
36
|
logger = get_logger(__name__)
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
def _get_backend_cls(backend: str) -> type[BaseLongTermMemoryBackend]:
|
|
40
|
+
match backend:
|
|
41
|
+
case "local":
|
|
42
|
+
from veadk.memory.long_term_memory_backends.in_memory_backend import (
|
|
43
|
+
InMemoryLTMBackend,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return InMemoryLTMBackend
|
|
47
|
+
case "opensearch":
|
|
48
|
+
from veadk.memory.long_term_memory_backends.opensearch_backend import (
|
|
49
|
+
OpensearchLTMBackend,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return OpensearchLTMBackend
|
|
53
|
+
case "viking":
|
|
54
|
+
from veadk.memory.long_term_memory_backends.vikingdb_memory_backend import (
|
|
55
|
+
VikingDBLTMBackend,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return VikingDBLTMBackend
|
|
59
|
+
case "redis":
|
|
60
|
+
from veadk.memory.long_term_memory_backends.redis_backend import (
|
|
61
|
+
RedisLTMBackend,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return RedisLTMBackend
|
|
65
|
+
case "mem0":
|
|
66
|
+
from veadk.memory.long_term_memory_backends.mem0_backend import (
|
|
67
|
+
Mem0LTMBackend,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return Mem0LTMBackend
|
|
71
|
+
|
|
72
|
+
raise ValueError(f"Unsupported long term memory backend: {backend}")
|
|
73
|
+
|
|
74
|
+
|
|
38
75
|
def build_long_term_memory_index(app_name: str, user_id: str):
|
|
39
76
|
return f"{app_name}_{user_id}"
|
|
40
77
|
|
|
41
78
|
|
|
42
79
|
class LongTermMemory(BaseMemoryService, BaseModel):
|
|
43
|
-
backend:
|
|
44
|
-
"local", "opensearch", "redis", "
|
|
80
|
+
backend: Union[
|
|
81
|
+
Literal["local", "opensearch", "redis", "viking", "viking_mem", "mem0"],
|
|
82
|
+
BaseLongTermMemoryBackend,
|
|
45
83
|
] = "opensearch"
|
|
84
|
+
"""Long term memory backend type"""
|
|
85
|
+
|
|
86
|
+
backend_config: dict = Field(default_factory=dict)
|
|
87
|
+
"""Long term memory backend configuration"""
|
|
88
|
+
|
|
46
89
|
top_k: int = 5
|
|
90
|
+
"""Number of top similar documents to retrieve during search."""
|
|
91
|
+
|
|
92
|
+
app_name: str = ""
|
|
93
|
+
|
|
94
|
+
user_id: str = ""
|
|
47
95
|
|
|
48
96
|
def model_post_init(self, __context: Any) -> None:
|
|
49
|
-
if self.backend == "
|
|
97
|
+
if self.backend == "viking_mem":
|
|
50
98
|
logger.warning(
|
|
51
|
-
"`
|
|
99
|
+
"The `viking_mem` backend is deprecated, please use `viking` instead."
|
|
52
100
|
)
|
|
53
|
-
self.backend = "
|
|
101
|
+
self.backend = "viking"
|
|
54
102
|
|
|
55
|
-
|
|
56
|
-
f"Initializing long term memory: backend={self.backend} top_k={self.top_k}"
|
|
57
|
-
)
|
|
103
|
+
self._backend = None
|
|
58
104
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
105
|
+
# Once user define a backend instance, use it directly
|
|
106
|
+
if isinstance(self.backend, BaseLongTermMemoryBackend):
|
|
107
|
+
self._backend = self.backend
|
|
108
|
+
logger.info(
|
|
109
|
+
f"Initialized long term memory with provided backend instance {self._backend.__class__.__name__}"
|
|
110
|
+
)
|
|
111
|
+
return
|
|
63
112
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
113
|
+
if self.backend_config:
|
|
114
|
+
logger.warning(
|
|
115
|
+
f"Initialized long term memory backend {self.backend} with config. We will ignore `app_name` and `user_id` if provided."
|
|
116
|
+
)
|
|
117
|
+
self._backend = _get_backend_cls(self.backend)(**self.backend_config)
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
if self.app_name and self.user_id:
|
|
121
|
+
self._index = build_long_term_memory_index(
|
|
122
|
+
app_name=self.app_name, user_id=self.user_id
|
|
123
|
+
)
|
|
124
|
+
logger.info(f"Long term memory index set to {self._index}.")
|
|
125
|
+
self._backend = _get_backend_cls(self.backend)(
|
|
126
|
+
index=self._index, **self.backend_config if self.backend_config else {}
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Neither `backend_instance`, `backend_config`, nor (`app_name`/`user_id`) is provided, the long term memory storage will initialize when adding a session."
|
|
131
|
+
)
|
|
67
132
|
|
|
68
133
|
def _filter_and_convert_events(self, events: list[Event]) -> list[str]:
|
|
69
134
|
final_events = []
|
|
@@ -91,40 +156,58 @@ class LongTermMemory(BaseMemoryService, BaseModel):
|
|
|
91
156
|
self,
|
|
92
157
|
session: Session,
|
|
93
158
|
):
|
|
159
|
+
app_name = session.app_name
|
|
160
|
+
user_id = session.user_id
|
|
161
|
+
|
|
162
|
+
if not self._backend and isinstance(self.backend, str):
|
|
163
|
+
self._index = build_long_term_memory_index(app_name, user_id)
|
|
164
|
+
self._backend = _get_backend_cls(self.backend)(
|
|
165
|
+
index=self._index, **self.backend_config if self.backend_config else {}
|
|
166
|
+
)
|
|
167
|
+
logger.info(
|
|
168
|
+
f"Initialize long term memory backend now, index is {self._index}"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if not self._index and self._index != build_long_term_memory_index(
|
|
172
|
+
app_name, user_id
|
|
173
|
+
):
|
|
174
|
+
logger.warning(
|
|
175
|
+
f"The `app_name` or `user_id` is different from the initialized one, skip add session to memory. Initialized index: {self._index}, current built index: {build_long_term_memory_index(app_name, user_id)}"
|
|
176
|
+
)
|
|
177
|
+
return
|
|
94
178
|
event_strings = self._filter_and_convert_events(session.events)
|
|
95
|
-
index = build_long_term_memory_index(session.app_name, session.user_id)
|
|
96
179
|
|
|
97
180
|
logger.info(
|
|
98
|
-
f"Adding {len(event_strings)} events to long term memory: index={
|
|
181
|
+
f"Adding {len(event_strings)} events to long term memory: index={self._index}"
|
|
99
182
|
)
|
|
100
183
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
self._adapter.add(data=event_strings, index=index, user_id=session.user_id)
|
|
104
|
-
else:
|
|
105
|
-
self._adapter.add(data=event_strings, index=index)
|
|
184
|
+
if self._backend:
|
|
185
|
+
self._backend.save_memory(event_strings=event_strings, user_id=user_id)
|
|
106
186
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
187
|
+
logger.info(
|
|
188
|
+
f"Added {len(event_strings)} events to long term memory: index={self._index}"
|
|
189
|
+
)
|
|
190
|
+
else:
|
|
191
|
+
logger.error(
|
|
192
|
+
"Long term memory backend initialize failed, cannot add session to memory."
|
|
193
|
+
)
|
|
110
194
|
|
|
111
195
|
@override
|
|
112
196
|
async def search_memory(self, *, app_name: str, user_id: str, query: str):
|
|
113
|
-
|
|
197
|
+
# prevent model invoke `load_memory` before add session to this memory
|
|
198
|
+
if not self._backend:
|
|
199
|
+
logger.error(
|
|
200
|
+
"Long term memory backend is not initialized, cannot search memory."
|
|
201
|
+
)
|
|
202
|
+
return SearchMemoryResponse(memories=[])
|
|
114
203
|
|
|
115
204
|
logger.info(
|
|
116
|
-
f"Searching long term memory: query={query} index={
|
|
205
|
+
f"Searching long term memory: query={query} index={self._index} top_k={self.top_k}"
|
|
117
206
|
)
|
|
118
207
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
query=query, index=index, top_k=self.top_k, user_id=user_id
|
|
123
|
-
)
|
|
124
|
-
else:
|
|
125
|
-
memory_chunks = self._adapter.query(
|
|
126
|
-
query=query, index=index, top_k=self.top_k
|
|
127
|
-
)
|
|
208
|
+
memory_chunks = self._backend.search_memory(
|
|
209
|
+
query=query, top_k=self.top_k, user_id=user_id
|
|
210
|
+
)
|
|
128
211
|
|
|
129
212
|
memory_events = []
|
|
130
213
|
for memory in memory_chunks:
|
|
@@ -152,6 +235,6 @@ class LongTermMemory(BaseMemoryService, BaseModel):
|
|
|
152
235
|
)
|
|
153
236
|
|
|
154
237
|
logger.info(
|
|
155
|
-
f"Return {len(memory_events)} memory events for query: {query} index={
|
|
238
|
+
f"Return {len(memory_events)} memory events for query: {query} index={self._index}"
|
|
156
239
|
)
|
|
157
240
|
return SearchMemoryResponse(memories=memory_events)
|
|
@@ -13,33 +13,21 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from typing import Any
|
|
17
16
|
|
|
17
|
+
from pydantic import BaseModel
|
|
18
18
|
|
|
19
|
-
class DatabaseType:
|
|
20
|
-
LOCAL = "local"
|
|
21
|
-
RELATIONAL = "relational"
|
|
22
|
-
VECTOR = "vector"
|
|
23
|
-
KV = "kv"
|
|
24
19
|
|
|
20
|
+
class BaseLongTermMemoryBackend(ABC, BaseModel):
|
|
21
|
+
index: str
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
type: type of the database
|
|
31
|
-
|
|
32
|
-
Note:
|
|
33
|
-
No `update` function support currently.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
def __init__(self):
|
|
37
|
-
pass
|
|
38
|
-
|
|
39
|
-
def add(self, texts: list[Any], **kwargs: Any): ...
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def precheck_index_naming(self):
|
|
25
|
+
"""Check the index name is valid or not"""
|
|
40
26
|
|
|
41
27
|
@abstractmethod
|
|
42
|
-
def
|
|
28
|
+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
|
|
29
|
+
"""Save memory to long term memory backend"""
|
|
43
30
|
|
|
44
31
|
@abstractmethod
|
|
45
|
-
def
|
|
32
|
+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
|
|
33
|
+
"""Retrieve memory from long term memory backend"""
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from llama_index.core import Document, VectorStoreIndex
|
|
16
|
+
from llama_index.core.schema import BaseNode
|
|
17
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
from typing_extensions import Any, override
|
|
20
|
+
|
|
21
|
+
from veadk.configs.model_configs import EmbeddingModelConfig
|
|
22
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
23
|
+
from veadk.memory.long_term_memory_backends.base_backend import (
|
|
24
|
+
BaseLongTermMemoryBackend,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class InMemoryLTMBackend(BaseLongTermMemoryBackend):
|
|
29
|
+
embedding_config: EmbeddingModelConfig = Field(default_factory=EmbeddingModelConfig)
|
|
30
|
+
"""Embedding model configs"""
|
|
31
|
+
|
|
32
|
+
def precheck_index_naming(self):
|
|
33
|
+
# no checking
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def model_post_init(self, __context: Any) -> None:
|
|
37
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
38
|
+
model_name=self.embedding_config.name,
|
|
39
|
+
api_key=self.embedding_config.api_key,
|
|
40
|
+
api_base=self.embedding_config.api_base,
|
|
41
|
+
)
|
|
42
|
+
self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
|
|
43
|
+
|
|
44
|
+
@override
|
|
45
|
+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
|
|
46
|
+
for event_string in event_strings:
|
|
47
|
+
document = Document(text=event_string)
|
|
48
|
+
nodes = self._split_documents([document])
|
|
49
|
+
self._vector_index.insert_nodes(nodes)
|
|
50
|
+
return True
|
|
51
|
+
|
|
52
|
+
@override
|
|
53
|
+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
|
|
54
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
55
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
56
|
+
return [node.text for node in retrieved_nodes]
|
|
57
|
+
|
|
58
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
59
|
+
"""Split document into chunks"""
|
|
60
|
+
nodes = []
|
|
61
|
+
for document in documents:
|
|
62
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
63
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
64
|
+
nodes.extend(_nodes)
|
|
65
|
+
return nodes
|