thoughtflow 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thoughtflow/__init__.py +97 -5
- thoughtflow/_util.py +752 -0
- thoughtflow/action.py +357 -0
- thoughtflow/agent.py +66 -0
- thoughtflow/eval/__init__.py +34 -0
- thoughtflow/eval/harness.py +200 -0
- thoughtflow/eval/replay.py +137 -0
- thoughtflow/llm.py +250 -0
- thoughtflow/memory/__init__.py +32 -0
- thoughtflow/memory/base.py +1658 -0
- thoughtflow/message.py +140 -0
- thoughtflow/py.typed +2 -0
- thoughtflow/thought.py +1102 -0
- thoughtflow/thoughtflow6.py +4180 -0
- thoughtflow/tools/__init__.py +27 -0
- thoughtflow/tools/base.py +145 -0
- thoughtflow/tools/registry.py +122 -0
- thoughtflow/trace/__init__.py +34 -0
- thoughtflow/trace/events.py +183 -0
- thoughtflow/trace/schema.py +111 -0
- thoughtflow/trace/session.py +141 -0
- thoughtflow-0.0.3.dist-info/METADATA +215 -0
- thoughtflow-0.0.3.dist-info/RECORD +25 -0
- {thoughtflow-0.0.1.dist-info → thoughtflow-0.0.3.dist-info}/WHEEL +1 -2
- {thoughtflow-0.0.1.dist-info → thoughtflow-0.0.3.dist-info/licenses}/LICENSE +1 -1
- thoughtflow/jtools1.py +0 -25
- thoughtflow/jtools2.py +0 -27
- thoughtflow-0.0.1.dist-info/METADATA +0 -17
- thoughtflow-0.0.1.dist-info/RECORD +0 -8
- thoughtflow-0.0.1.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,1658 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MEMORY class for ThoughtFlow.
|
|
3
|
+
|
|
4
|
+
The MEMORY class serves as an event-sourced state container for managing events,
|
|
5
|
+
logs, messages, reflections, and variables within the Thoughtflow framework.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import copy
|
|
12
|
+
import pickle
|
|
13
|
+
import pprint
|
|
14
|
+
import datetime as dtt
|
|
15
|
+
|
|
16
|
+
from thoughtflow._util import (
|
|
17
|
+
event_stamp,
|
|
18
|
+
VAR_DELETED,
|
|
19
|
+
compress_to_json,
|
|
20
|
+
decompress_from_json,
|
|
21
|
+
estimate_size,
|
|
22
|
+
is_obj_ref,
|
|
23
|
+
truncate_content,
|
|
24
|
+
tz_bog,
|
|
25
|
+
tz_utc,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MEMORY:
|
|
30
|
+
"""
|
|
31
|
+
The MEMORY class serves as an event-sourced state container for managing events,
|
|
32
|
+
logs, messages, reflections, and variables within the Thoughtflow framework.
|
|
33
|
+
|
|
34
|
+
All state changes are stored as events with sortable IDs (alphabetical = chronological).
|
|
35
|
+
Events are stored in a dictionary for O(1) lookup, with separate sorted indexes for
|
|
36
|
+
efficient retrieval. The memory can be fully reconstructed from its event list.
|
|
37
|
+
|
|
38
|
+
Architecture:
|
|
39
|
+
- DATA LAYER: events dict (stamp → event object) - single source of truth
|
|
40
|
+
- INDEX LAYER: idx_* lists of [timestamp, stamp] pairs, sorted chronologically
|
|
41
|
+
- VARIABLE LAYER: vars dict with full history as list of [stamp, value] pairs
|
|
42
|
+
- OBJECT LAYER: objects dict for compressed large data storage
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
id (str): Unique identifier for this MEMORY instance (event_stamp).
|
|
46
|
+
events (dict): Dictionary mapping event stamps to full event objects.
|
|
47
|
+
idx_msgs (list): Sorted list of [timestamp, stamp] pairs for messages.
|
|
48
|
+
idx_refs (list): Sorted list of [timestamp, stamp] pairs for reflections.
|
|
49
|
+
idx_logs (list): Sorted list of [timestamp, stamp] pairs for logs.
|
|
50
|
+
idx_vars (list): Sorted list of [timestamp, stamp] pairs for variable changes.
|
|
51
|
+
idx_all (list): Master sorted list of all [timestamp, stamp] pairs.
|
|
52
|
+
vars (dict): Dictionary mapping variable names to list of [stamp, value] pairs.
|
|
53
|
+
Deleted variables have VAR_DELETED as the value in their last entry.
|
|
54
|
+
Large values auto-convert to object references: {'_obj_ref': stamp}.
|
|
55
|
+
var_desc_history (dict): Dictionary mapping variable names to list of [stamp, description] pairs.
|
|
56
|
+
Tracks description evolution separately from value changes.
|
|
57
|
+
objects (dict): Dictionary mapping stamps to compressed object dicts.
|
|
58
|
+
Each object is JSON-serializable with base64-encoded compressed data.
|
|
59
|
+
object_threshold (int): Size threshold (bytes) for auto-converting vars to objects.
|
|
60
|
+
valid_roles (set): Set of valid roles for messages.
|
|
61
|
+
valid_modes (set): Set of valid modes for messages.
|
|
62
|
+
valid_channels (set): Set of valid communication channels.
|
|
63
|
+
|
|
64
|
+
Methods:
|
|
65
|
+
add_msg(role, content, mode='text', channel='unknown'): Add a message event with channel.
|
|
66
|
+
add_log(message): Add a log event.
|
|
67
|
+
add_ref(content): Add a reflection event.
|
|
68
|
+
get_msgs(...): Retrieve messages with filtering (supports channel filter).
|
|
69
|
+
get_events(...): Retrieve all events with filtering.
|
|
70
|
+
get_logs(limit=-1): Get log events.
|
|
71
|
+
get_refs(limit=-1): Get reflection events.
|
|
72
|
+
last_user_msg(): Get the last user message content.
|
|
73
|
+
last_asst_msg(): Get the last assistant message content.
|
|
74
|
+
last_sys_msg(): Get the last system message content.
|
|
75
|
+
last_log_msg(): Get the last log message content.
|
|
76
|
+
prepare_context(...): Prepare messages for LLM with smart truncation of old messages.
|
|
77
|
+
set_var(key, value, desc=''): Set a variable (appends to history, auto-converts large values to objects).
|
|
78
|
+
del_var(key): Mark a variable as deleted (preserves history).
|
|
79
|
+
get_var(key, resolve_refs=True): Get current value (auto-resolves object refs).
|
|
80
|
+
get_all_vars(resolve_refs=True): Get dict of all current non-deleted values.
|
|
81
|
+
get_var_history(key, resolve_refs=False): Get full history as list of [stamp, value].
|
|
82
|
+
get_var_desc(key): Get the current description of a variable.
|
|
83
|
+
get_var_desc_history(key): Get full description history as list of [stamp, description].
|
|
84
|
+
is_var_deleted(key): Check if a variable is currently marked as deleted.
|
|
85
|
+
set_obj(data, name=None, desc='', content_type='auto'): Store compressed object, optionally link to variable.
|
|
86
|
+
get_obj(stamp): Retrieve and decompress an object by stamp.
|
|
87
|
+
get_obj_info(stamp): Get object metadata without decompressing.
|
|
88
|
+
snapshot(): Export memory state as dict (includes events and objects).
|
|
89
|
+
save(filename, compressed=False): Save memory to file (pickle format).
|
|
90
|
+
load(filename, compressed=False): Load memory from file (pickle format).
|
|
91
|
+
to_json(filename=None, indent=2): Export memory to JSON file or string.
|
|
92
|
+
from_json(source): Class method to load memory from JSON file or string.
|
|
93
|
+
copy(): Return a deep copy of the MEMORY instance.
|
|
94
|
+
from_events(event_list, memory_id=None, objects=None): Class method to rehydrate from events/objects.
|
|
95
|
+
|
|
96
|
+
Example Usage:
|
|
97
|
+
memory = MEMORY()
|
|
98
|
+
|
|
99
|
+
# Messages have channel tracking (for omni-directional communication)
|
|
100
|
+
memory.add_msg('user', 'Hello!', channel='webapp')
|
|
101
|
+
memory.add_msg('assistant', 'Hi there!', channel='webapp')
|
|
102
|
+
|
|
103
|
+
# Logs and reflections are internal (no channel)
|
|
104
|
+
memory.add_log('User greeted the assistant')
|
|
105
|
+
memory.add_ref('User seems friendly')
|
|
106
|
+
|
|
107
|
+
# Variables maintain full history (no channel needed)
|
|
108
|
+
memory.set_var('foo', 42, 'A test variable')
|
|
109
|
+
memory.set_var('foo', 100) # Appends to history
|
|
110
|
+
memory.get_var('foo') # Returns 100
|
|
111
|
+
memory.get_var_history('foo') # Returns [[stamp1, 42], [stamp2, 100]]
|
|
112
|
+
|
|
113
|
+
# Deletion is a tombstone, not removal
|
|
114
|
+
memory.del_var('foo')
|
|
115
|
+
memory.get_var('foo') # Returns None
|
|
116
|
+
memory.is_var_deleted('foo') # Returns True
|
|
117
|
+
memory.set_var('foo', 200) # Can re-set after deletion
|
|
118
|
+
|
|
119
|
+
# Large values auto-convert to compressed objects
|
|
120
|
+
large_data = 'x' * 20000 # Exceeds default 10KB threshold
|
|
121
|
+
memory.set_var('big_data', large_data) # Auto-converts to object
|
|
122
|
+
memory.get_var('big_data') # Returns decompressed data
|
|
123
|
+
memory.get_var('big_data', resolve_refs=False) # Returns {'_obj_ref': stamp}
|
|
124
|
+
|
|
125
|
+
# Direct object storage
|
|
126
|
+
stamp = memory.set_obj(image_bytes, name='avatar', desc='User avatar')
|
|
127
|
+
memory.get_var('avatar') # Returns decompressed image_bytes
|
|
128
|
+
memory.get_obj(stamp) # Direct access by stamp
|
|
129
|
+
memory.get_obj_info(stamp) # Metadata without decompressing
|
|
130
|
+
|
|
131
|
+
# Inspect internal state (public attributes)
|
|
132
|
+
print(memory.events) # All events by stamp
|
|
133
|
+
print(memory.objects) # All objects by stamp
|
|
134
|
+
print(memory.vars) # Variable histories
|
|
135
|
+
|
|
136
|
+
memory.save('memory.pkl')
|
|
137
|
+
memory2 = MEMORY()
|
|
138
|
+
memory2.load('memory.pkl')
|
|
139
|
+
|
|
140
|
+
# Export to JSON (like DataFrame.to_csv)
|
|
141
|
+
memory.to_json('memory_backup.json')
|
|
142
|
+
memory4 = MEMORY.from_json('memory_backup.json')
|
|
143
|
+
|
|
144
|
+
# Rehydrate from events and objects (preserves all history)
|
|
145
|
+
snap = memory.snapshot()
|
|
146
|
+
memory3 = MEMORY.from_events(snap['events'].values(), objects=snap['objects'])
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self):
|
|
150
|
+
import bisect
|
|
151
|
+
self._bisect = bisect # Store for use in methods
|
|
152
|
+
|
|
153
|
+
self.id = event_stamp()
|
|
154
|
+
|
|
155
|
+
# DATA LAYER: Single source of truth for all events
|
|
156
|
+
self.events = {} # stamp → full event dict
|
|
157
|
+
|
|
158
|
+
# INDEX LAYER: Sorted lists of [timestamp, stamp] pairs
|
|
159
|
+
# Format: [[dt_utc, stamp], ...] - aligns with Redis sorted set structure
|
|
160
|
+
# Sorted by timestamp (ISO string sorts chronologically)
|
|
161
|
+
self.idx_msgs = [] # Message [timestamp, stamp] pairs
|
|
162
|
+
self.idx_refs = [] # Reflection [timestamp, stamp] pairs
|
|
163
|
+
self.idx_logs = [] # Log [timestamp, stamp] pairs
|
|
164
|
+
self.idx_vars = [] # Variable-change [timestamp, stamp] pairs
|
|
165
|
+
self.idx_all = [] # Master index (all [timestamp, stamp] pairs)
|
|
166
|
+
|
|
167
|
+
# VARIABLE LAYER: Full history with timestamps
|
|
168
|
+
# vars[key] = [[stamp1, value1], [stamp2, value2], ...]
|
|
169
|
+
# Deleted variables have VAR_DELETED as value in their last entry
|
|
170
|
+
self.vars = {} # var_name → list of [stamp, value] pairs
|
|
171
|
+
self.var_desc_history = {} # var_name → list of [stamp, description] pairs
|
|
172
|
+
|
|
173
|
+
# OBJECT LAYER: Compressed storage for large data
|
|
174
|
+
# objects[stamp] = {
|
|
175
|
+
# 'data': base64_encoded_compressed_string,
|
|
176
|
+
# 'size_original': int,
|
|
177
|
+
# 'size_compressed': int,
|
|
178
|
+
# 'content_type': str, # 'bytes', 'text', 'json', 'pickle'
|
|
179
|
+
# }
|
|
180
|
+
self.objects = {} # stamp → compressed object dict
|
|
181
|
+
|
|
182
|
+
# Threshold for auto-converting variables to objects (bytes)
|
|
183
|
+
self.object_threshold = 10000 # 10KB default
|
|
184
|
+
|
|
185
|
+
# Valid values
|
|
186
|
+
self.valid_roles = {
|
|
187
|
+
'system',
|
|
188
|
+
'user',
|
|
189
|
+
'assistant',
|
|
190
|
+
'reflection',
|
|
191
|
+
'action',
|
|
192
|
+
'query',
|
|
193
|
+
'result',
|
|
194
|
+
'logger',
|
|
195
|
+
}
|
|
196
|
+
self.valid_modes = {
|
|
197
|
+
'text',
|
|
198
|
+
'audio',
|
|
199
|
+
'voice',
|
|
200
|
+
}
|
|
201
|
+
self.valid_channels = {
|
|
202
|
+
'webapp',
|
|
203
|
+
'ios',
|
|
204
|
+
'android',
|
|
205
|
+
'telegram',
|
|
206
|
+
'whatsapp',
|
|
207
|
+
'slack',
|
|
208
|
+
'api',
|
|
209
|
+
'cli',
|
|
210
|
+
'unknown',
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
#--- Internal Methods ---
|
|
214
|
+
|
|
215
|
+
def _add_to_index(self, index_list, timestamp, stamp):
|
|
216
|
+
"""
|
|
217
|
+
Insert [timestamp, stamp] pair maintaining sorted order by timestamp.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
index_list: One of the idx_* lists
|
|
221
|
+
timestamp: ISO timestamp string (dt_utc)
|
|
222
|
+
stamp: Event stamp ID
|
|
223
|
+
"""
|
|
224
|
+
# bisect.insort sorts by first element of tuple/list (timestamp)
|
|
225
|
+
self._bisect.insort(index_list, [timestamp, stamp])
|
|
226
|
+
|
|
227
|
+
def _store_event(self, event_type, obj):
|
|
228
|
+
"""
|
|
229
|
+
Store event in data layer and add to appropriate indexes.
|
|
230
|
+
This is the single entry point for all event creation.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
event_type: One of 'msg', 'ref', 'log', 'var'
|
|
234
|
+
obj: The full event dict (must contain 'stamp' and 'dt_utc' keys)
|
|
235
|
+
"""
|
|
236
|
+
stamp = obj['stamp']
|
|
237
|
+
timestamp = obj['dt_utc']
|
|
238
|
+
|
|
239
|
+
# Store in data layer
|
|
240
|
+
self.events[stamp] = obj
|
|
241
|
+
|
|
242
|
+
# Add to type-specific index (with [timestamp, stamp] format)
|
|
243
|
+
if event_type == 'msg':
|
|
244
|
+
self._add_to_index(self.idx_msgs, timestamp, stamp)
|
|
245
|
+
elif event_type == 'ref':
|
|
246
|
+
self._add_to_index(self.idx_refs, timestamp, stamp)
|
|
247
|
+
elif event_type == 'log':
|
|
248
|
+
self._add_to_index(self.idx_logs, timestamp, stamp)
|
|
249
|
+
elif event_type == 'var':
|
|
250
|
+
self._add_to_index(self.idx_vars, timestamp, stamp)
|
|
251
|
+
|
|
252
|
+
# Always add to master index
|
|
253
|
+
self._add_to_index(self.idx_all, timestamp, stamp)
|
|
254
|
+
|
|
255
|
+
def _get_events_from_index(self, index, limit=-1):
|
|
256
|
+
"""
|
|
257
|
+
Get events from an index, optionally limited to last N.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
index: One of the idx_* lists (format: [[timestamp, stamp], ...])
|
|
261
|
+
limit: Max events to return (-1 = all)
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
List of event dicts
|
|
265
|
+
"""
|
|
266
|
+
pairs = index if limit <= 0 else index[-limit:]
|
|
267
|
+
# Extract stamp (second element) from each [timestamp, stamp] pair
|
|
268
|
+
return [self.events[ts_stamp[1]] for ts_stamp in pairs if ts_stamp[1] in self.events]
|
|
269
|
+
|
|
270
|
+
def _get_latest_desc(self, key):
|
|
271
|
+
"""
|
|
272
|
+
Get the latest description for a variable from its description history.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
key: Variable name
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Latest description string, or empty string if none exists
|
|
279
|
+
"""
|
|
280
|
+
history = self.var_desc_history.get(key)
|
|
281
|
+
if not history:
|
|
282
|
+
return ''
|
|
283
|
+
return history[-1][1] # Return description from last [stamp, desc] pair
|
|
284
|
+
|
|
285
|
+
#--- Public Methods ---
|
|
286
|
+
|
|
287
|
+
def add_msg(self, role, content, mode='text', channel='unknown'):
|
|
288
|
+
"""
|
|
289
|
+
Add a message event with channel tracking.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
role: Message role (user, assistant, system, etc.)
|
|
293
|
+
content: Message content
|
|
294
|
+
mode: Communication mode (text, audio, voice)
|
|
295
|
+
channel: Communication channel (webapp, ios, telegram, etc.)
|
|
296
|
+
"""
|
|
297
|
+
if role not in self.valid_roles:
|
|
298
|
+
raise ValueError("Invalid role '{}'. Must be one of: {}".format(role, sorted(self.valid_roles)))
|
|
299
|
+
if mode not in self.valid_modes:
|
|
300
|
+
raise ValueError("Invalid mode '{}'. Must be one of: {}".format(mode, sorted(self.valid_modes)))
|
|
301
|
+
if channel not in self.valid_channels:
|
|
302
|
+
raise ValueError("Invalid channel '{}'. Must be one of: {}".format(channel, sorted(self.valid_channels)))
|
|
303
|
+
|
|
304
|
+
stamp = event_stamp({'role': role, 'content': content})
|
|
305
|
+
msg = {
|
|
306
|
+
'stamp' : stamp,
|
|
307
|
+
'type' : 'msg',
|
|
308
|
+
'role' : role,
|
|
309
|
+
'content' : content,
|
|
310
|
+
'mode' : mode,
|
|
311
|
+
'channel' : channel,
|
|
312
|
+
'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
|
|
313
|
+
'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
|
|
314
|
+
}
|
|
315
|
+
self._store_event('msg', msg)
|
|
316
|
+
|
|
317
|
+
def add_log(self, message):
|
|
318
|
+
"""
|
|
319
|
+
Add a log event.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
message: Log message content
|
|
323
|
+
"""
|
|
324
|
+
stamp = event_stamp({'content': message})
|
|
325
|
+
log_entry = {
|
|
326
|
+
'stamp' : stamp,
|
|
327
|
+
'type' : 'log',
|
|
328
|
+
'role' : 'logger',
|
|
329
|
+
'content' : message,
|
|
330
|
+
'mode' : 'text',
|
|
331
|
+
'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
|
|
332
|
+
'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
|
|
333
|
+
}
|
|
334
|
+
self._store_event('log', log_entry)
|
|
335
|
+
|
|
336
|
+
def add_ref(self, content):
|
|
337
|
+
"""
|
|
338
|
+
Add a reflection event.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
content: Reflection content
|
|
342
|
+
"""
|
|
343
|
+
stamp = event_stamp({'content': content})
|
|
344
|
+
ref = {
|
|
345
|
+
'stamp' : stamp,
|
|
346
|
+
'type' : 'ref',
|
|
347
|
+
'role' : 'reflection',
|
|
348
|
+
'content' : content,
|
|
349
|
+
'mode' : 'text',
|
|
350
|
+
'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
|
|
351
|
+
'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
|
|
352
|
+
}
|
|
353
|
+
self._store_event('ref', ref)
|
|
354
|
+
|
|
355
|
+
#---
|
|
356
|
+
|
|
357
|
+
def get_msgs(self,
|
|
358
|
+
limit=-1,
|
|
359
|
+
include=None,
|
|
360
|
+
exclude=None,
|
|
361
|
+
repr='list',
|
|
362
|
+
channel=None,
|
|
363
|
+
):
|
|
364
|
+
"""
|
|
365
|
+
Get messages with flexible filtering.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
limit: Max messages to return (-1 = all)
|
|
369
|
+
include: List of roles to include (None = all)
|
|
370
|
+
exclude: List of roles to exclude (None = none)
|
|
371
|
+
repr: Output format ('list', 'str', 'pprint1')
|
|
372
|
+
channel: Filter by channel (None = all)
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Messages in the specified format
|
|
376
|
+
"""
|
|
377
|
+
# Get all messages from index
|
|
378
|
+
events = self._get_events_from_index(self.idx_msgs, -1)
|
|
379
|
+
|
|
380
|
+
# Apply filters
|
|
381
|
+
if include:
|
|
382
|
+
events = [e for e in events if e.get('role') in include]
|
|
383
|
+
if exclude:
|
|
384
|
+
exclude = exclude or []
|
|
385
|
+
events = [e for e in events if e.get('role') not in exclude]
|
|
386
|
+
if channel:
|
|
387
|
+
events = [e for e in events if e.get('channel') == channel]
|
|
388
|
+
|
|
389
|
+
if limit > 0:
|
|
390
|
+
events = events[-limit:]
|
|
391
|
+
|
|
392
|
+
if repr == 'list':
|
|
393
|
+
return events
|
|
394
|
+
elif repr == 'str':
|
|
395
|
+
return '\n'.join(["{}: {}".format(e['role'], e['content']) for e in events])
|
|
396
|
+
elif repr == 'pprint1':
|
|
397
|
+
return pprint.pformat(events, indent=1)
|
|
398
|
+
else:
|
|
399
|
+
raise ValueError("Invalid repr option. Choose from 'list', 'str', or 'pprint1'.")
|
|
400
|
+
|
|
401
|
+
def get_events(self, limit=-1, event_types=None, channel=None):
|
|
402
|
+
"""
|
|
403
|
+
Get all events, optionally filtered by type and channel.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
limit: Max events (-1 = all)
|
|
407
|
+
event_types: List like ['msg', 'log', 'ref', 'var'] (None = all)
|
|
408
|
+
channel: Filter by channel (None = all)
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
List of event dicts
|
|
412
|
+
"""
|
|
413
|
+
events = self._get_events_from_index(self.idx_all, -1)
|
|
414
|
+
|
|
415
|
+
if event_types:
|
|
416
|
+
events = [e for e in events if e.get('type') in event_types]
|
|
417
|
+
if channel:
|
|
418
|
+
events = [e for e in events if e.get('channel') == channel]
|
|
419
|
+
|
|
420
|
+
if limit > 0:
|
|
421
|
+
events = events[-limit:]
|
|
422
|
+
|
|
423
|
+
return events
|
|
424
|
+
|
|
425
|
+
def get_logs(self, limit=-1):
|
|
426
|
+
"""
|
|
427
|
+
Get log events.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
limit: Max logs to return (-1 = all)
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
List of log event dicts
|
|
434
|
+
"""
|
|
435
|
+
events = self._get_events_from_index(self.idx_logs, -1)
|
|
436
|
+
|
|
437
|
+
if limit > 0:
|
|
438
|
+
events = events[-limit:]
|
|
439
|
+
|
|
440
|
+
return events
|
|
441
|
+
|
|
442
|
+
def get_refs(self, limit=-1):
|
|
443
|
+
"""
|
|
444
|
+
Get reflection events.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
limit: Max reflections to return (-1 = all)
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
List of reflection event dicts
|
|
451
|
+
"""
|
|
452
|
+
events = self._get_events_from_index(self.idx_refs, -1)
|
|
453
|
+
|
|
454
|
+
if limit > 0:
|
|
455
|
+
events = events[-limit:]
|
|
456
|
+
|
|
457
|
+
return events
|
|
458
|
+
|
|
459
|
+
def last_user_msg(self):
|
|
460
|
+
"""Get the content of the last user message."""
|
|
461
|
+
msgs = self.get_msgs(include=['user'])
|
|
462
|
+
return msgs[-1]['content'] if msgs else ''
|
|
463
|
+
|
|
464
|
+
def last_asst_msg(self):
|
|
465
|
+
"""Get the content of the last assistant message."""
|
|
466
|
+
msgs = self.get_msgs(include=['assistant'])
|
|
467
|
+
return msgs[-1]['content'] if msgs else ''
|
|
468
|
+
|
|
469
|
+
def last_sys_msg(self):
|
|
470
|
+
"""Get the content of the last system message."""
|
|
471
|
+
msgs = self.get_msgs(include=['system'])
|
|
472
|
+
return msgs[-1]['content'] if msgs else ''
|
|
473
|
+
|
|
474
|
+
def last_log_msg(self):
|
|
475
|
+
"""Get the content of the last log message."""
|
|
476
|
+
logs = self.get_logs()
|
|
477
|
+
return logs[-1]['content'] if logs else ''
|
|
478
|
+
|
|
479
|
+
def prepare_context(
|
|
480
|
+
self,
|
|
481
|
+
recent_count=6,
|
|
482
|
+
truncate_threshold=500,
|
|
483
|
+
header_len=200,
|
|
484
|
+
footer_len=200,
|
|
485
|
+
include_roles=('user', 'assistant'),
|
|
486
|
+
format='list',
|
|
487
|
+
):
|
|
488
|
+
"""
|
|
489
|
+
Prepare messages for LLM context with smart truncation of old messages.
|
|
490
|
+
|
|
491
|
+
Messages within the most recent `recent_count` are returned unchanged.
|
|
492
|
+
Older messages that exceed `truncate_threshold` chars have their middle
|
|
493
|
+
content truncated, preserving a header and footer with an expandable marker.
|
|
494
|
+
|
|
495
|
+
The truncation marker includes the message's stamp, allowing an LLM to
|
|
496
|
+
request expansion of specific messages via memory.events[stamp].
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
recent_count: Number of recent messages to keep untruncated (default 6)
|
|
500
|
+
truncate_threshold: Min chars before truncation applies (default 500)
|
|
501
|
+
header_len: Characters to keep from start (default 200)
|
|
502
|
+
footer_len: Characters to keep from end (default 200)
|
|
503
|
+
include_roles: Tuple of roles to include (default ('user', 'assistant'))
|
|
504
|
+
format: 'list' returns list of dicts, 'openai' returns OpenAI-compatible format
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
List of message dicts with 'role' and 'content' keys.
|
|
508
|
+
Older messages may have truncated content with expansion markers.
|
|
509
|
+
|
|
510
|
+
Example:
|
|
511
|
+
# Get context-ready messages for LLM
|
|
512
|
+
context = memory.prepare_context(recent_count=6, truncate_threshold=500)
|
|
513
|
+
|
|
514
|
+
# Use with OpenAI API
|
|
515
|
+
context = memory.prepare_context(format='openai')
|
|
516
|
+
response = client.chat.completions.create(
|
|
517
|
+
model='gpt-4',
|
|
518
|
+
messages=context
|
|
519
|
+
)
|
|
520
|
+
"""
|
|
521
|
+
# Get all messages for included roles
|
|
522
|
+
msgs = self.get_msgs(include=list(include_roles))
|
|
523
|
+
|
|
524
|
+
if not msgs:
|
|
525
|
+
return []
|
|
526
|
+
|
|
527
|
+
# Determine cutoff point for truncation
|
|
528
|
+
# Messages at index < cutoff_idx are candidates for truncation
|
|
529
|
+
cutoff_idx = max(0, len(msgs) - recent_count)
|
|
530
|
+
|
|
531
|
+
result = []
|
|
532
|
+
for i, msg in enumerate(msgs):
|
|
533
|
+
stamp = msg.get('stamp', '')
|
|
534
|
+
role = msg.get('role', 'user')
|
|
535
|
+
content = msg.get('content', '')
|
|
536
|
+
|
|
537
|
+
# Apply truncation to older messages
|
|
538
|
+
if i < cutoff_idx:
|
|
539
|
+
content = truncate_content(
|
|
540
|
+
content,
|
|
541
|
+
stamp,
|
|
542
|
+
threshold=truncate_threshold,
|
|
543
|
+
header_len=header_len,
|
|
544
|
+
footer_len=footer_len
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
if format == 'openai':
|
|
548
|
+
# OpenAI expects 'user', 'assistant', 'system' roles
|
|
549
|
+
result.append({'role': role, 'content': content})
|
|
550
|
+
else:
|
|
551
|
+
# List format includes more metadata
|
|
552
|
+
result.append({
|
|
553
|
+
'role': role,
|
|
554
|
+
'content': content,
|
|
555
|
+
'stamp': stamp,
|
|
556
|
+
'truncated': i < cutoff_idx and len(msg.get('content', '')) > truncate_threshold,
|
|
557
|
+
})
|
|
558
|
+
|
|
559
|
+
return result
|
|
560
|
+
|
|
561
|
+
#---
|
|
562
|
+
|
|
563
|
+
def set_var(self, key, value, desc=''):
|
|
564
|
+
"""
|
|
565
|
+
Store a variable by appending to its history list.
|
|
566
|
+
Variable changes are first-class events in the event stream.
|
|
567
|
+
Each variable maintains a full history of [stamp, value] pairs.
|
|
568
|
+
|
|
569
|
+
Large values (exceeding object_threshold) are automatically converted
|
|
570
|
+
to compressed objects, with an object reference stored in the history.
|
|
571
|
+
|
|
572
|
+
Descriptions are tracked separately in var_desc_history since they
|
|
573
|
+
change less frequently than values.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
key: Variable name
|
|
577
|
+
value: Variable value (any type)
|
|
578
|
+
desc: Optional description (appended to description history if provided)
|
|
579
|
+
"""
|
|
580
|
+
# Check if value should be stored as object (auto-conversion)
|
|
581
|
+
value_size = estimate_size(value)
|
|
582
|
+
if value_size > self.object_threshold:
|
|
583
|
+
# Store as object, use reference in history
|
|
584
|
+
obj_stamp = event_stamp({'obj': str(value)[:50]})
|
|
585
|
+
compressed_obj = compress_to_json(value)
|
|
586
|
+
self.objects[obj_stamp] = compressed_obj
|
|
587
|
+
stored_value = {'_obj_ref': obj_stamp}
|
|
588
|
+
else:
|
|
589
|
+
stored_value = value
|
|
590
|
+
|
|
591
|
+
stamp = event_stamp({'var': key, 'value': str(value)[:100]})
|
|
592
|
+
|
|
593
|
+
# Initialize history list if this is a new variable
|
|
594
|
+
if key not in self.vars:
|
|
595
|
+
self.vars[key] = []
|
|
596
|
+
|
|
597
|
+
# Append new [stamp, stored_value] pair to history
|
|
598
|
+
self.vars[key].append([stamp, stored_value])
|
|
599
|
+
|
|
600
|
+
# Track description changes separately (only when provided)
|
|
601
|
+
if desc:
|
|
602
|
+
if key not in self.var_desc_history:
|
|
603
|
+
self.var_desc_history[key] = []
|
|
604
|
+
self.var_desc_history[key].append([stamp, desc])
|
|
605
|
+
|
|
606
|
+
# Get latest description from history (or the one we just set)
|
|
607
|
+
current_desc = desc if desc else self._get_latest_desc(key)
|
|
608
|
+
|
|
609
|
+
# Create variable-change event
|
|
610
|
+
var_event = {
|
|
611
|
+
'stamp' : stamp,
|
|
612
|
+
'type' : 'var',
|
|
613
|
+
'role' : 'system',
|
|
614
|
+
'var_name' : key,
|
|
615
|
+
'var_value': stored_value, # Store reference if large, else value
|
|
616
|
+
'var_desc' : current_desc,
|
|
617
|
+
'content' : "Variable '{}' set".format(key) + (' (as object ref)' if is_obj_ref(stored_value) else ''),
|
|
618
|
+
'mode' : 'text',
|
|
619
|
+
'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
|
|
620
|
+
'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
|
|
621
|
+
}
|
|
622
|
+
self._store_event('var', var_event)
|
|
623
|
+
|
|
624
|
+
def del_var(self, key):
|
|
625
|
+
"""
|
|
626
|
+
Mark a variable as deleted by appending a VAR_DELETED tombstone.
|
|
627
|
+
The variable's history is preserved; it can be re-set later.
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
key: Variable name to delete
|
|
631
|
+
|
|
632
|
+
Raises:
|
|
633
|
+
KeyError: If the variable doesn't exist
|
|
634
|
+
"""
|
|
635
|
+
if key not in self.vars:
|
|
636
|
+
raise KeyError("Variable '{}' does not exist".format(key))
|
|
637
|
+
|
|
638
|
+
stamp = event_stamp({'var': key, 'action': 'delete'})
|
|
639
|
+
|
|
640
|
+
# Append deletion marker to history
|
|
641
|
+
self.vars[key].append([stamp, VAR_DELETED])
|
|
642
|
+
|
|
643
|
+
# Create variable-delete event
|
|
644
|
+
var_event = {
|
|
645
|
+
'stamp' : stamp,
|
|
646
|
+
'type' : 'var',
|
|
647
|
+
'role' : 'system',
|
|
648
|
+
'var_name' : key,
|
|
649
|
+
'var_value': None,
|
|
650
|
+
'var_deleted': True,
|
|
651
|
+
'var_desc' : self._get_latest_desc(key),
|
|
652
|
+
'content' : "Variable '{}' deleted".format(key),
|
|
653
|
+
'mode' : 'text',
|
|
654
|
+
'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
|
|
655
|
+
'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
|
|
656
|
+
}
|
|
657
|
+
self._store_event('var', var_event)
|
|
658
|
+
|
|
659
|
+
def get_var(self, key, resolve_refs=True):
|
|
660
|
+
"""
|
|
661
|
+
Return the current value of a variable.
|
|
662
|
+
|
|
663
|
+
If the value is an object reference, it is automatically resolved
|
|
664
|
+
and the decompressed data is returned (unless resolve_refs=False).
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
key: Variable name
|
|
668
|
+
resolve_refs: If True (default), resolve object references to actual data
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
Current value, or None if not found or deleted
|
|
672
|
+
"""
|
|
673
|
+
history = self.vars.get(key)
|
|
674
|
+
if not history:
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
# Get the last value
|
|
678
|
+
last_stamp, last_value = history[-1]
|
|
679
|
+
|
|
680
|
+
# Return None if deleted
|
|
681
|
+
if last_value is VAR_DELETED:
|
|
682
|
+
return None
|
|
683
|
+
|
|
684
|
+
# Resolve object reference if applicable
|
|
685
|
+
if resolve_refs and is_obj_ref(last_value):
|
|
686
|
+
return self.get_obj(last_value['_obj_ref'])
|
|
687
|
+
|
|
688
|
+
return last_value
|
|
689
|
+
|
|
690
|
+
def is_var_deleted(self, key):
|
|
691
|
+
"""
|
|
692
|
+
Check if a variable is currently marked as deleted.
|
|
693
|
+
|
|
694
|
+
Args:
|
|
695
|
+
key: Variable name
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
True if the variable exists and is deleted, False otherwise
|
|
699
|
+
"""
|
|
700
|
+
history = self.vars.get(key)
|
|
701
|
+
if not history:
|
|
702
|
+
return False
|
|
703
|
+
|
|
704
|
+
last_stamp, last_value = history[-1]
|
|
705
|
+
return last_value is VAR_DELETED
|
|
706
|
+
|
|
707
|
+
def get_all_vars(self, resolve_refs=True):
|
|
708
|
+
"""
|
|
709
|
+
Get a dictionary of all current non-deleted variable values.
|
|
710
|
+
|
|
711
|
+
Args:
|
|
712
|
+
resolve_refs: If True (default), resolve object references to actual data
|
|
713
|
+
|
|
714
|
+
Returns:
|
|
715
|
+
dict: Variable name → current value (excludes deleted variables)
|
|
716
|
+
"""
|
|
717
|
+
result = {}
|
|
718
|
+
for key, history in self.vars.items():
|
|
719
|
+
if history:
|
|
720
|
+
last_stamp, last_value = history[-1]
|
|
721
|
+
if last_value is not VAR_DELETED:
|
|
722
|
+
# Resolve object reference if applicable
|
|
723
|
+
if resolve_refs and is_obj_ref(last_value):
|
|
724
|
+
result[key] = self.get_obj(last_value['_obj_ref'])
|
|
725
|
+
else:
|
|
726
|
+
result[key] = last_value
|
|
727
|
+
return result
|
|
728
|
+
|
|
729
|
+
def get_var_history(self, key, resolve_refs=False):
|
|
730
|
+
"""
|
|
731
|
+
Get full history of a variable as list of [stamp, value] pairs.
|
|
732
|
+
Includes all historical values and deletion markers.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
key: Variable name
|
|
736
|
+
resolve_refs: If True, resolve object references to actual data.
|
|
737
|
+
Default False to preserve the raw history structure.
|
|
738
|
+
|
|
739
|
+
Returns:
|
|
740
|
+
List of [stamp, value] pairs, or empty list if variable doesn't exist.
|
|
741
|
+
Deleted entries have VAR_DELETED as the value.
|
|
742
|
+
Object references appear as {'_obj_ref': stamp} unless resolve_refs=True.
|
|
743
|
+
"""
|
|
744
|
+
history = self.vars.get(key, [])
|
|
745
|
+
if not resolve_refs:
|
|
746
|
+
return list(history)
|
|
747
|
+
|
|
748
|
+
# Resolve object references
|
|
749
|
+
resolved = []
|
|
750
|
+
for stamp, value in history:
|
|
751
|
+
if is_obj_ref(value):
|
|
752
|
+
resolved.append([stamp, self.get_obj(value['_obj_ref'])])
|
|
753
|
+
else:
|
|
754
|
+
resolved.append([stamp, value])
|
|
755
|
+
return resolved
|
|
756
|
+
|
|
757
|
+
def get_var_desc(self, key):
|
|
758
|
+
"""
|
|
759
|
+
Get the current (latest) description of a variable.
|
|
760
|
+
|
|
761
|
+
Args:
|
|
762
|
+
key: Variable name
|
|
763
|
+
|
|
764
|
+
Returns:
|
|
765
|
+
Latest description string, or default message if no description exists
|
|
766
|
+
"""
|
|
767
|
+
desc = self._get_latest_desc(key)
|
|
768
|
+
return desc if desc else "No description found."
|
|
769
|
+
|
|
770
|
+
def get_var_desc_history(self, key):
|
|
771
|
+
"""
|
|
772
|
+
Get full history of a variable's descriptions as list of [stamp, description] pairs.
|
|
773
|
+
|
|
774
|
+
Args:
|
|
775
|
+
key: Variable name
|
|
776
|
+
|
|
777
|
+
Returns:
|
|
778
|
+
List of [stamp, description] pairs, or empty list if variable has no descriptions.
|
|
779
|
+
"""
|
|
780
|
+
return list(self.var_desc_history.get(key, []))
|
|
781
|
+
|
|
782
|
+
#--- Object Methods ---
|
|
783
|
+
|
|
784
|
+
def set_obj(self, data, name=None, desc='', content_type='auto'):
|
|
785
|
+
"""
|
|
786
|
+
Store a large object in compressed form.
|
|
787
|
+
|
|
788
|
+
Objects are compressed using zlib and base64-encoded for JSON serialization.
|
|
789
|
+
Optionally creates a variable reference to the stored object.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
data: The data to store (bytes, str, or any JSON/pickle-serializable object)
|
|
793
|
+
name: Optional variable name to create a reference
|
|
794
|
+
desc: Description (used only if name is provided)
|
|
795
|
+
content_type: 'bytes', 'text', 'json', 'pickle', or 'auto'
|
|
796
|
+
|
|
797
|
+
Returns:
|
|
798
|
+
str: The object stamp (ID)
|
|
799
|
+
|
|
800
|
+
Example:
|
|
801
|
+
# Store raw data, get stamp back
|
|
802
|
+
stamp = memory.set_obj(large_text)
|
|
803
|
+
|
|
804
|
+
# Store and create variable reference
|
|
805
|
+
memory.set_obj(image_bytes, name='profile_pic', desc='User avatar')
|
|
806
|
+
memory.get_var('profile_pic') # Returns decompressed image_bytes
|
|
807
|
+
"""
|
|
808
|
+
stamp = event_stamp({'obj': str(data)[:50]})
|
|
809
|
+
|
|
810
|
+
# Compress and store
|
|
811
|
+
compressed_obj = compress_to_json(data, content_type)
|
|
812
|
+
self.objects[stamp] = compressed_obj
|
|
813
|
+
|
|
814
|
+
# Optionally create a variable reference
|
|
815
|
+
if name:
|
|
816
|
+
obj_ref = {'_obj_ref': stamp}
|
|
817
|
+
# Store reference directly in vars (bypassing size check)
|
|
818
|
+
var_stamp = event_stamp({'var': name})
|
|
819
|
+
|
|
820
|
+
# Initialize history if needed
|
|
821
|
+
if name not in self.vars:
|
|
822
|
+
self.vars[name] = []
|
|
823
|
+
|
|
824
|
+
# Append [stamp, obj_ref] to history
|
|
825
|
+
self.vars[name].append([var_stamp, obj_ref])
|
|
826
|
+
|
|
827
|
+
# Track description changes separately (only when provided)
|
|
828
|
+
if desc:
|
|
829
|
+
if name not in self.var_desc_history:
|
|
830
|
+
self.var_desc_history[name] = []
|
|
831
|
+
self.var_desc_history[name].append([var_stamp, desc])
|
|
832
|
+
|
|
833
|
+
# Get latest description for the event
|
|
834
|
+
current_desc = desc if desc else self._get_latest_desc(name)
|
|
835
|
+
|
|
836
|
+
# Store the var event
|
|
837
|
+
var_event = {
|
|
838
|
+
'type' : 'var',
|
|
839
|
+
'stamp' : var_stamp,
|
|
840
|
+
'var_name' : name,
|
|
841
|
+
'var_value': obj_ref, # Store the reference, not the data
|
|
842
|
+
'var_deleted': False,
|
|
843
|
+
'var_desc' : current_desc,
|
|
844
|
+
'content' : "Variable '{}' set to object ref: {}".format(name, stamp),
|
|
845
|
+
'mode' : 'text',
|
|
846
|
+
'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
|
|
847
|
+
'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
|
|
848
|
+
}
|
|
849
|
+
self._store_event('var', var_event)
|
|
850
|
+
|
|
851
|
+
return stamp
|
|
852
|
+
|
|
853
|
+
def get_obj(self, stamp):
|
|
854
|
+
"""
|
|
855
|
+
Retrieve and decompress an object by its stamp.
|
|
856
|
+
|
|
857
|
+
Args:
|
|
858
|
+
stamp: The object's event stamp
|
|
859
|
+
|
|
860
|
+
Returns:
|
|
861
|
+
The decompressed original data, or None if not found
|
|
862
|
+
|
|
863
|
+
Example:
|
|
864
|
+
data = memory.get_obj('A1B2C3...')
|
|
865
|
+
"""
|
|
866
|
+
obj_dict = self.objects.get(stamp)
|
|
867
|
+
if obj_dict is None:
|
|
868
|
+
return None
|
|
869
|
+
return decompress_from_json(obj_dict)
|
|
870
|
+
|
|
871
|
+
def get_obj_info(self, stamp):
|
|
872
|
+
"""
|
|
873
|
+
Get metadata about a stored object without decompressing it.
|
|
874
|
+
|
|
875
|
+
Args:
|
|
876
|
+
stamp: The object's event stamp
|
|
877
|
+
|
|
878
|
+
Returns:
|
|
879
|
+
dict with size_original, size_compressed, content_type, or None if not found
|
|
880
|
+
"""
|
|
881
|
+
obj_dict = self.objects.get(stamp)
|
|
882
|
+
if obj_dict is None:
|
|
883
|
+
return None
|
|
884
|
+
return {
|
|
885
|
+
'stamp': stamp,
|
|
886
|
+
'size_original': obj_dict['size_original'],
|
|
887
|
+
'size_compressed': obj_dict['size_compressed'],
|
|
888
|
+
'content_type': obj_dict['content_type'],
|
|
889
|
+
'compression_ratio': obj_dict['size_compressed'] / obj_dict['size_original'] if obj_dict['size_original'] > 0 else 0,
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
#---
|
|
893
|
+
|
|
894
|
+
def snapshot(self):
|
|
895
|
+
"""
|
|
896
|
+
Export memory state as dict.
|
|
897
|
+
Stores events and objects - indexes can be rehydrated from events.
|
|
898
|
+
|
|
899
|
+
Returns:
|
|
900
|
+
dict with 'id', 'events', and 'objects' keys
|
|
901
|
+
"""
|
|
902
|
+
return {
|
|
903
|
+
'id': self.id,
|
|
904
|
+
'events': dict(self.events), # All events by stamp
|
|
905
|
+
'objects': dict(self.objects), # All objects by stamp (already JSON-serializable)
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
def save(self, filename, compressed=False):
|
|
909
|
+
"""
|
|
910
|
+
Save memory to file.
|
|
911
|
+
|
|
912
|
+
Args:
|
|
913
|
+
filename: Path to save file
|
|
914
|
+
compressed: If True, use gzip compression
|
|
915
|
+
"""
|
|
916
|
+
import gzip
|
|
917
|
+
data = self.snapshot()
|
|
918
|
+
if compressed:
|
|
919
|
+
with gzip.open(filename, 'wb') as f:
|
|
920
|
+
pickle.dump(data, f)
|
|
921
|
+
else:
|
|
922
|
+
with open(filename, 'wb') as f:
|
|
923
|
+
pickle.dump(data, f)
|
|
924
|
+
|
|
925
|
+
def load(self, filename, compressed=False):
|
|
926
|
+
"""
|
|
927
|
+
Load memory from file by rehydrating from events.
|
|
928
|
+
|
|
929
|
+
Args:
|
|
930
|
+
filename: Path to load file
|
|
931
|
+
compressed: If True, expect gzip compression
|
|
932
|
+
"""
|
|
933
|
+
import gzip
|
|
934
|
+
if compressed:
|
|
935
|
+
with gzip.open(filename, 'rb') as f:
|
|
936
|
+
data = pickle.load(f)
|
|
937
|
+
else:
|
|
938
|
+
with open(filename, 'rb') as f:
|
|
939
|
+
data = pickle.load(f)
|
|
940
|
+
|
|
941
|
+
# Rehydrate from events (pass objects if present)
|
|
942
|
+
event_list = list(data.get('events', {}).values())
|
|
943
|
+
objects = data.get('objects', {})
|
|
944
|
+
mem = MEMORY.from_events(event_list, data.get('id'), objects=objects)
|
|
945
|
+
|
|
946
|
+
# Copy state to self
|
|
947
|
+
self.id = mem.id
|
|
948
|
+
self.events = mem.events
|
|
949
|
+
self.idx_msgs = mem.idx_msgs
|
|
950
|
+
self.idx_refs = mem.idx_refs
|
|
951
|
+
self.idx_logs = mem.idx_logs
|
|
952
|
+
self.idx_vars = mem.idx_vars
|
|
953
|
+
self.idx_all = mem.idx_all
|
|
954
|
+
self.vars = mem.vars
|
|
955
|
+
self.var_desc_history = mem.var_desc_history
|
|
956
|
+
self.objects = mem.objects
|
|
957
|
+
|
|
958
|
+
def copy(self):
|
|
959
|
+
"""Return a deep copy of the MEMORY instance."""
|
|
960
|
+
return copy.deepcopy(self)
|
|
961
|
+
|
|
962
|
+
def to_json(self, filename=None, indent=2):
|
|
963
|
+
"""
|
|
964
|
+
Export memory to JSON format.
|
|
965
|
+
|
|
966
|
+
Like DataFrame.to_csv(), this allows saving memory state to a portable
|
|
967
|
+
JSON format that can be loaded later with from_json().
|
|
968
|
+
|
|
969
|
+
Args:
|
|
970
|
+
filename: If provided, write to file. Otherwise return JSON string.
|
|
971
|
+
indent: JSON indentation level (default 2, use None for compact)
|
|
972
|
+
|
|
973
|
+
Returns:
|
|
974
|
+
JSON string if filename is None, else None
|
|
975
|
+
|
|
976
|
+
Example:
|
|
977
|
+
# Save to file
|
|
978
|
+
memory.to_json('memory_backup.json')
|
|
979
|
+
|
|
980
|
+
# Get JSON string
|
|
981
|
+
json_str = memory.to_json()
|
|
982
|
+
"""
|
|
983
|
+
# Prepare data for JSON serialization
|
|
984
|
+
# Need to handle VAR_DELETED sentinel in vars history
|
|
985
|
+
def serialize_var_history(var_dict):
|
|
986
|
+
"""Convert VAR_DELETED sentinel to JSON-safe marker."""
|
|
987
|
+
result = {}
|
|
988
|
+
for key, history in var_dict.items():
|
|
989
|
+
serialized_history = []
|
|
990
|
+
for stamp, value in history:
|
|
991
|
+
if value is VAR_DELETED:
|
|
992
|
+
serialized_history.append([stamp, '__VAR_DELETED__'])
|
|
993
|
+
else:
|
|
994
|
+
serialized_history.append([stamp, value])
|
|
995
|
+
result[key] = serialized_history
|
|
996
|
+
return result
|
|
997
|
+
|
|
998
|
+
data = {
|
|
999
|
+
'version': '1.0',
|
|
1000
|
+
'id': self.id,
|
|
1001
|
+
'events': self.events,
|
|
1002
|
+
'objects': self.objects,
|
|
1003
|
+
'vars': serialize_var_history(self.vars),
|
|
1004
|
+
'var_desc_history': self.var_desc_history,
|
|
1005
|
+
'idx_msgs': self.idx_msgs,
|
|
1006
|
+
'idx_refs': self.idx_refs,
|
|
1007
|
+
'idx_logs': self.idx_logs,
|
|
1008
|
+
'idx_vars': self.idx_vars,
|
|
1009
|
+
'idx_all': self.idx_all,
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
json_str = json.dumps(data, indent=indent, ensure_ascii=False)
|
|
1013
|
+
|
|
1014
|
+
if filename:
|
|
1015
|
+
with open(filename, 'w', encoding='utf-8') as f:
|
|
1016
|
+
f.write(json_str)
|
|
1017
|
+
return None
|
|
1018
|
+
return json_str
|
|
1019
|
+
|
|
1020
|
+
@classmethod
|
|
1021
|
+
def from_json(cls, source):
|
|
1022
|
+
"""
|
|
1023
|
+
Create MEMORY instance from JSON.
|
|
1024
|
+
|
|
1025
|
+
Like DataFrame.read_csv(), this loads a memory from a JSON file or string
|
|
1026
|
+
that was saved with to_json().
|
|
1027
|
+
|
|
1028
|
+
Args:
|
|
1029
|
+
source: JSON string or filename path
|
|
1030
|
+
|
|
1031
|
+
Returns:
|
|
1032
|
+
New MEMORY instance
|
|
1033
|
+
|
|
1034
|
+
Example:
|
|
1035
|
+
# Load from file
|
|
1036
|
+
memory = MEMORY.from_json('memory_backup.json')
|
|
1037
|
+
|
|
1038
|
+
# Load from JSON string
|
|
1039
|
+
memory = MEMORY.from_json(json_str)
|
|
1040
|
+
"""
|
|
1041
|
+
import os
|
|
1042
|
+
|
|
1043
|
+
# Determine if source is a file or JSON string
|
|
1044
|
+
if os.path.isfile(source):
|
|
1045
|
+
with open(source, 'r', encoding='utf-8') as f:
|
|
1046
|
+
data = json.load(f)
|
|
1047
|
+
else:
|
|
1048
|
+
data = json.loads(source)
|
|
1049
|
+
|
|
1050
|
+
# Helper to restore VAR_DELETED sentinel
|
|
1051
|
+
def deserialize_var_history(var_dict):
|
|
1052
|
+
"""Convert JSON marker back to VAR_DELETED sentinel."""
|
|
1053
|
+
result = {}
|
|
1054
|
+
for key, history in var_dict.items():
|
|
1055
|
+
deserialized_history = []
|
|
1056
|
+
for stamp, value in history:
|
|
1057
|
+
if value == '__VAR_DELETED__':
|
|
1058
|
+
deserialized_history.append([stamp, VAR_DELETED])
|
|
1059
|
+
else:
|
|
1060
|
+
deserialized_history.append([stamp, value])
|
|
1061
|
+
result[key] = deserialized_history
|
|
1062
|
+
return result
|
|
1063
|
+
|
|
1064
|
+
# Create new instance
|
|
1065
|
+
mem = cls()
|
|
1066
|
+
mem.id = data.get('id', mem.id)
|
|
1067
|
+
mem.events = data.get('events', {})
|
|
1068
|
+
mem.objects = data.get('objects', {})
|
|
1069
|
+
mem.vars = deserialize_var_history(data.get('vars', {}))
|
|
1070
|
+
mem.var_desc_history = data.get('var_desc_history', {})
|
|
1071
|
+
mem.idx_msgs = data.get('idx_msgs', [])
|
|
1072
|
+
mem.idx_refs = data.get('idx_refs', [])
|
|
1073
|
+
mem.idx_logs = data.get('idx_logs', [])
|
|
1074
|
+
mem.idx_vars = data.get('idx_vars', [])
|
|
1075
|
+
mem.idx_all = data.get('idx_all', [])
|
|
1076
|
+
|
|
1077
|
+
return mem
|
|
1078
|
+
|
|
1079
|
+
@classmethod
|
|
1080
|
+
def from_events(cls, event_list, memory_id=None, objects=None):
|
|
1081
|
+
"""
|
|
1082
|
+
Rehydrate a MEMORY instance from a list of events.
|
|
1083
|
+
This is the inverse of snapshot - enables cloud sync.
|
|
1084
|
+
|
|
1085
|
+
Args:
|
|
1086
|
+
event_list: List of event dicts (order doesn't matter, will be sorted)
|
|
1087
|
+
memory_id: Optional ID for the memory instance
|
|
1088
|
+
objects: Optional dict of objects (stamp → compressed object dict)
|
|
1089
|
+
|
|
1090
|
+
Returns:
|
|
1091
|
+
New MEMORY instance with all events loaded
|
|
1092
|
+
"""
|
|
1093
|
+
mem = cls()
|
|
1094
|
+
if memory_id:
|
|
1095
|
+
mem.id = memory_id
|
|
1096
|
+
|
|
1097
|
+
# Restore objects if provided
|
|
1098
|
+
if objects:
|
|
1099
|
+
mem.objects = dict(objects)
|
|
1100
|
+
|
|
1101
|
+
# Sort events by timestamp (dt_utc) for chronological order
|
|
1102
|
+
sorted_events = sorted(event_list, key=lambda e: e.get('dt_utc', ''))
|
|
1103
|
+
|
|
1104
|
+
for ev in sorted_events:
|
|
1105
|
+
stamp = ev.get('stamp')
|
|
1106
|
+
timestamp = ev.get('dt_utc', '')
|
|
1107
|
+
if not stamp:
|
|
1108
|
+
continue
|
|
1109
|
+
|
|
1110
|
+
event_type = ev.get('type', 'msg')
|
|
1111
|
+
|
|
1112
|
+
# Store in data layer
|
|
1113
|
+
mem.events[stamp] = ev
|
|
1114
|
+
|
|
1115
|
+
# Create [timestamp, stamp] pair for indexes
|
|
1116
|
+
ts_pair = [timestamp, stamp]
|
|
1117
|
+
|
|
1118
|
+
# Add to appropriate index (direct append since already sorted by timestamp)
|
|
1119
|
+
if event_type == 'msg':
|
|
1120
|
+
mem.idx_msgs.append(ts_pair)
|
|
1121
|
+
elif event_type == 'ref':
|
|
1122
|
+
mem.idx_refs.append(ts_pair)
|
|
1123
|
+
elif event_type == 'log':
|
|
1124
|
+
mem.idx_logs.append(ts_pair)
|
|
1125
|
+
elif event_type == 'var':
|
|
1126
|
+
mem.idx_vars.append(ts_pair)
|
|
1127
|
+
# Replay variable state into history list
|
|
1128
|
+
var_name = ev.get('var_name')
|
|
1129
|
+
if var_name:
|
|
1130
|
+
# Initialize history list if needed
|
|
1131
|
+
if var_name not in mem.vars:
|
|
1132
|
+
mem.vars[var_name] = []
|
|
1133
|
+
|
|
1134
|
+
# Determine value (check for deletion marker)
|
|
1135
|
+
if ev.get('var_deleted', False):
|
|
1136
|
+
value = VAR_DELETED
|
|
1137
|
+
else:
|
|
1138
|
+
value = ev.get('var_value')
|
|
1139
|
+
|
|
1140
|
+
# Append to history
|
|
1141
|
+
mem.vars[var_name].append([stamp, value])
|
|
1142
|
+
|
|
1143
|
+
# Rebuild description history if present
|
|
1144
|
+
var_desc = ev.get('var_desc')
|
|
1145
|
+
if var_desc:
|
|
1146
|
+
if var_name not in mem.var_desc_history:
|
|
1147
|
+
mem.var_desc_history[var_name] = []
|
|
1148
|
+
# Only add if different from last description (avoid duplicates)
|
|
1149
|
+
desc_hist = mem.var_desc_history[var_name]
|
|
1150
|
+
if not desc_hist or desc_hist[-1][1] != var_desc:
|
|
1151
|
+
desc_hist.append([stamp, var_desc])
|
|
1152
|
+
|
|
1153
|
+
mem.idx_all.append(ts_pair)
|
|
1154
|
+
|
|
1155
|
+
return mem
|
|
1156
|
+
|
|
1157
|
+
#---
|
|
1158
|
+
|
|
1159
|
+
# The render method provides a flexible way to display or export the MEMORY's messages or events.
|
|
1160
|
+
# It supports event type selection, output format, advanced filtering, metadata inclusion, pretty-printing, and message condensing.
|
|
1161
|
+
def render(
|
|
1162
|
+
self,
|
|
1163
|
+
include=('msgs',), # Tuple/list of event types to include: 'msgs', 'logs', 'refs', 'vars', 'events'
|
|
1164
|
+
output_format='plain', # 'plain', 'markdown', 'json', 'table', 'conversation'
|
|
1165
|
+
role_filter=None, # List of roles to include (None = all)
|
|
1166
|
+
mode_filter=None, # List of modes to include (None = all)
|
|
1167
|
+
channel_filter=None, # Channel to filter by (None = all)
|
|
1168
|
+
content_filter=None, # String or list of keywords to filter content (None = all)
|
|
1169
|
+
include_metadata=True, # Whether to include metadata (timestamps, roles, etc.)
|
|
1170
|
+
pretty=True, # Pretty-print for human readability
|
|
1171
|
+
max_length=None, # Max total length of output (int, None = unlimited)
|
|
1172
|
+
condense_msg=True, # If True, snip/condense messages that exceed max_length
|
|
1173
|
+
time_range=None, # Tuple (start_dt, end_dt) to filter by datetime (None = all)
|
|
1174
|
+
event_limit=None, # Max number of events to include (None = all)
|
|
1175
|
+
# Conversation/LLM-optimized options:
|
|
1176
|
+
max_message_length=1000, # Max length per individual message (for 'conversation' format)
|
|
1177
|
+
max_total_length=8000, # Max total length of the entire conversation (for 'conversation' format)
|
|
1178
|
+
include_roles=('user', 'assistant'), # Which roles to include (for 'conversation' format)
|
|
1179
|
+
message_separator="\n\n", # Separator between messages (for 'conversation' format)
|
|
1180
|
+
role_prefix=True, # Whether to include role prefixes like "User:" and "Assistant:" (for 'conversation' format)
|
|
1181
|
+
truncate_indicator="...", # What to show when content is truncated (for 'conversation' format)
|
|
1182
|
+
):
|
|
1183
|
+
"""
|
|
1184
|
+
Render MEMORY contents with flexible filtering and formatting.
|
|
1185
|
+
|
|
1186
|
+
This method unifies all rendering and export logic, including:
|
|
1187
|
+
- General event/message rendering (plain, markdown, table, json)
|
|
1188
|
+
- Advanced filtering (by role, mode, channel, content, time, event type)
|
|
1189
|
+
- Metadata inclusion and pretty-printing
|
|
1190
|
+
- Output length limiting and message condensing/snipping
|
|
1191
|
+
- LLM-optimized conversation export (via output_format='conversation'),
|
|
1192
|
+
which produces a clean text blob of user/assistant messages with
|
|
1193
|
+
configurable length and formatting options.
|
|
1194
|
+
|
|
1195
|
+
Args:
|
|
1196
|
+
include: Which event types to include ('msgs', 'logs', 'refs', 'vars', 'events')
|
|
1197
|
+
output_format: 'plain', 'markdown', 'json', 'table', or 'conversation'
|
|
1198
|
+
role_filter: List of roles to include (None = all)
|
|
1199
|
+
mode_filter: List of modes to include (None = all)
|
|
1200
|
+
channel_filter: Channel to filter by (None = all)
|
|
1201
|
+
content_filter: String or list of keywords to filter content (None = all)
|
|
1202
|
+
include_metadata: Whether to include metadata (timestamps, roles, etc.)
|
|
1203
|
+
pretty: Pretty-print for human readability
|
|
1204
|
+
max_length: Max total length of output (for general formats)
|
|
1205
|
+
condense_msg: If True, snip/condense messages that exceed max_length
|
|
1206
|
+
time_range: Tuple (start_dt, end_dt) to filter by datetime (None = all)
|
|
1207
|
+
event_limit: Max number of events to include (None = all)
|
|
1208
|
+
max_message_length: Max length per message (for 'conversation' format)
|
|
1209
|
+
max_total_length: Max total length (for 'conversation' format)
|
|
1210
|
+
include_roles: Which roles to include (for 'conversation' format)
|
|
1211
|
+
message_separator: Separator between messages (for 'conversation' format)
|
|
1212
|
+
role_prefix: Whether to include role prefixes (for 'conversation' format)
|
|
1213
|
+
truncate_indicator: Indicator for truncated content (for 'conversation' format)
|
|
1214
|
+
|
|
1215
|
+
Returns:
|
|
1216
|
+
str or dict: Rendered output in the specified format.
|
|
1217
|
+
|
|
1218
|
+
Example usage:
|
|
1219
|
+
mem = MEMORY()
|
|
1220
|
+
mem.add_msg('user', 'Hello!')
|
|
1221
|
+
mem.add_msg('assistant', 'Hi there!')
|
|
1222
|
+
print(mem.render()) # Default: plain text, all messages
|
|
1223
|
+
|
|
1224
|
+
# Render only user messages in markdown
|
|
1225
|
+
print(mem.render(role_filter=['user'], output_format='markdown'))
|
|
1226
|
+
|
|
1227
|
+
# Render as a table, including logs and refs
|
|
1228
|
+
print(mem.render(include=('msgs', 'logs', 'refs'), output_format='table'))
|
|
1229
|
+
|
|
1230
|
+
# Render with a content keyword filter and max length
|
|
1231
|
+
print(mem.render(content_filter='hello', max_length=50))
|
|
1232
|
+
|
|
1233
|
+
# Export as LLM-optimized conversation
|
|
1234
|
+
print(mem.render(output_format='conversation', max_total_length=2000))
|
|
1235
|
+
|
|
1236
|
+
# Filter by channel
|
|
1237
|
+
print(mem.render(channel_filter='telegram'))
|
|
1238
|
+
"""
|
|
1239
|
+
from datetime import datetime
|
|
1240
|
+
|
|
1241
|
+
# Helper: flatten include to set for fast lookup
|
|
1242
|
+
include_set = set(include)
|
|
1243
|
+
|
|
1244
|
+
# Helper: filter events by type using the new index-based retrieval
|
|
1245
|
+
def filter_events():
|
|
1246
|
+
events = []
|
|
1247
|
+
if 'events' in include_set:
|
|
1248
|
+
# Include all events from master index
|
|
1249
|
+
events = self._get_events_from_index(self.idx_all, -1)
|
|
1250
|
+
else:
|
|
1251
|
+
# Selectively include types
|
|
1252
|
+
if 'msgs' in include_set:
|
|
1253
|
+
events.extend(self._get_events_from_index(self.idx_msgs, -1))
|
|
1254
|
+
if 'logs' in include_set:
|
|
1255
|
+
events.extend(self._get_events_from_index(self.idx_logs, -1))
|
|
1256
|
+
if 'refs' in include_set:
|
|
1257
|
+
events.extend(self._get_events_from_index(self.idx_refs, -1))
|
|
1258
|
+
if 'vars' in include_set:
|
|
1259
|
+
events.extend(self._get_events_from_index(self.idx_vars, -1))
|
|
1260
|
+
return events
|
|
1261
|
+
|
|
1262
|
+
# Helper: filter by role, mode, channel, content, and time
|
|
1263
|
+
def advanced_filter(evlist):
|
|
1264
|
+
filtered = []
|
|
1265
|
+
for ev in evlist:
|
|
1266
|
+
# Role filter
|
|
1267
|
+
if role_filter:
|
|
1268
|
+
ev_role = ev.get('role') or ev.get('type')
|
|
1269
|
+
if ev_role not in role_filter:
|
|
1270
|
+
continue
|
|
1271
|
+
# Mode filter
|
|
1272
|
+
if mode_filter and ev.get('mode') not in mode_filter:
|
|
1273
|
+
continue
|
|
1274
|
+
# Channel filter
|
|
1275
|
+
if channel_filter and ev.get('channel') != channel_filter:
|
|
1276
|
+
continue
|
|
1277
|
+
# Content filter
|
|
1278
|
+
if content_filter:
|
|
1279
|
+
content = ev.get('content', '')
|
|
1280
|
+
if isinstance(content_filter, str):
|
|
1281
|
+
if content_filter.lower() not in content.lower():
|
|
1282
|
+
continue
|
|
1283
|
+
else: # list of keywords
|
|
1284
|
+
if not any(kw.lower() in content.lower() for kw in content_filter):
|
|
1285
|
+
continue
|
|
1286
|
+
# Time filter
|
|
1287
|
+
if time_range:
|
|
1288
|
+
# Try to get timestamp from event
|
|
1289
|
+
dt_str = ev.get('dt_utc') or ev.get('dt_bog')
|
|
1290
|
+
if dt_str:
|
|
1291
|
+
try:
|
|
1292
|
+
dt = datetime.fromisoformat(dt_str)
|
|
1293
|
+
start, end = time_range
|
|
1294
|
+
if (start and dt < start) or (end and dt > end):
|
|
1295
|
+
continue
|
|
1296
|
+
except Exception:
|
|
1297
|
+
pass # Ignore if can't parse
|
|
1298
|
+
filtered.append(ev)
|
|
1299
|
+
return filtered
|
|
1300
|
+
|
|
1301
|
+
# Helper: sort events by stamp (alphabetical = chronological)
|
|
1302
|
+
def sort_events(evlist):
|
|
1303
|
+
return sorted(evlist, key=lambda ev: ev.get('stamp', ''))
|
|
1304
|
+
|
|
1305
|
+
# Step 1: Gather and filter events
|
|
1306
|
+
events = filter_events()
|
|
1307
|
+
events = advanced_filter(events)
|
|
1308
|
+
events = sort_events(events)
|
|
1309
|
+
if event_limit:
|
|
1310
|
+
events = events[-event_limit:] # Most recent N
|
|
1311
|
+
|
|
1312
|
+
# --- Conversation/LLM-optimized format ---
|
|
1313
|
+
if output_format == 'conversation':
|
|
1314
|
+
# Only include messages and filter by include_roles
|
|
1315
|
+
conv_msgs = [ev for ev in events if ev.get('role') in include_roles]
|
|
1316
|
+
# Already sorted by stamp
|
|
1317
|
+
|
|
1318
|
+
conversation_parts = []
|
|
1319
|
+
current_length = 0
|
|
1320
|
+
for msg in conv_msgs:
|
|
1321
|
+
role = msg.get('role', 'unknown')
|
|
1322
|
+
content = msg.get('content', '')
|
|
1323
|
+
|
|
1324
|
+
# Truncate individual message if needed
|
|
1325
|
+
if len(content) > max_message_length:
|
|
1326
|
+
content = content[:max_message_length - len(truncate_indicator)] + truncate_indicator
|
|
1327
|
+
|
|
1328
|
+
# Format the message
|
|
1329
|
+
if role_prefix:
|
|
1330
|
+
if role == 'user':
|
|
1331
|
+
formatted_msg = "User: " + content
|
|
1332
|
+
elif role == 'assistant':
|
|
1333
|
+
formatted_msg = "Assistant: " + content
|
|
1334
|
+
else:
|
|
1335
|
+
formatted_msg = role.title() + ": " + content
|
|
1336
|
+
else:
|
|
1337
|
+
formatted_msg = content
|
|
1338
|
+
|
|
1339
|
+
# Check if adding this message would exceed total length
|
|
1340
|
+
message_length = len(formatted_msg) + len(message_separator)
|
|
1341
|
+
if current_length + message_length > max_total_length:
|
|
1342
|
+
# If we can't fit the full message, try to fit a truncated version
|
|
1343
|
+
remaining_space = max_total_length - current_length - len(truncate_indicator)
|
|
1344
|
+
if remaining_space > 50: # Only add if there's reasonable space
|
|
1345
|
+
if role_prefix:
|
|
1346
|
+
prefix_len = len(role.title() + ": ")
|
|
1347
|
+
truncated_content = content[:remaining_space - prefix_len] + truncate_indicator
|
|
1348
|
+
formatted_msg = role.title() + ": " + truncated_content
|
|
1349
|
+
else:
|
|
1350
|
+
formatted_msg = content[:remaining_space] + truncate_indicator
|
|
1351
|
+
conversation_parts.append(formatted_msg)
|
|
1352
|
+
break
|
|
1353
|
+
|
|
1354
|
+
conversation_parts.append(formatted_msg)
|
|
1355
|
+
current_length += message_length
|
|
1356
|
+
|
|
1357
|
+
return message_separator.join(conversation_parts)
|
|
1358
|
+
|
|
1359
|
+
# --- JSON format ---
|
|
1360
|
+
output = None
|
|
1361
|
+
total_length = 0
|
|
1362
|
+
snip_notice = " [snipped]" # For snipped messages
|
|
1363
|
+
|
|
1364
|
+
if output_format == 'json':
|
|
1365
|
+
# Output as JSON (list of dicts)
|
|
1366
|
+
if not include_metadata:
|
|
1367
|
+
# Remove metadata fields
|
|
1368
|
+
def strip_meta(ev):
|
|
1369
|
+
return {k: v for k, v in ev.items() if k in ('role', 'content', 'type', 'channel')}
|
|
1370
|
+
out_events = [strip_meta(ev) for ev in events]
|
|
1371
|
+
else:
|
|
1372
|
+
out_events = events
|
|
1373
|
+
output = json.dumps(out_events, indent=2 if pretty else None, default=str)
|
|
1374
|
+
if max_length and len(output) > max_length:
|
|
1375
|
+
output = output[:max_length] + snip_notice
|
|
1376
|
+
|
|
1377
|
+
elif output_format in ('plain', 'markdown', 'table'):
|
|
1378
|
+
# Build lines for each event
|
|
1379
|
+
lines = []
|
|
1380
|
+
for ev in events:
|
|
1381
|
+
# Compose line based on event type
|
|
1382
|
+
event_type = ev.get('type', 'msg')
|
|
1383
|
+
if event_type == 'log' or ev.get('role') == 'logger':
|
|
1384
|
+
prefix = "[LOG]"
|
|
1385
|
+
content = ev.get('content', '')
|
|
1386
|
+
elif event_type == 'ref':
|
|
1387
|
+
prefix = "[REF]"
|
|
1388
|
+
content = ev.get('content', '')
|
|
1389
|
+
elif event_type == 'var':
|
|
1390
|
+
prefix = "[VAR]"
|
|
1391
|
+
content = "{} = {}".format(ev.get('var_name', '?'), ev.get('var_value', '?'))
|
|
1392
|
+
else:
|
|
1393
|
+
prefix = "[{}]".format(ev.get('role', 'MSG').upper())
|
|
1394
|
+
content = ev.get('content', '')
|
|
1395
|
+
|
|
1396
|
+
# Optionally include metadata
|
|
1397
|
+
meta = ""
|
|
1398
|
+
if include_metadata:
|
|
1399
|
+
dt = ev.get('dt_utc') or ev.get('dt_bog')
|
|
1400
|
+
stamp = ev.get('stamp', '')
|
|
1401
|
+
channel = ev.get('channel', '')
|
|
1402
|
+
meta = " ({})".format(dt) if dt else ""
|
|
1403
|
+
if output_format == 'table':
|
|
1404
|
+
meta = "\t{}\t{}\t{}".format(dt or '', stamp or '', channel or '')
|
|
1405
|
+
|
|
1406
|
+
# Condense message if needed
|
|
1407
|
+
line = "{} {}{}".format(prefix, content, meta)
|
|
1408
|
+
if max_length and total_length + len(line) > max_length:
|
|
1409
|
+
if condense_msg:
|
|
1410
|
+
# Snip the content to fit
|
|
1411
|
+
allowed = max_length - total_length - len(snip_notice)
|
|
1412
|
+
if allowed > 0:
|
|
1413
|
+
line = line[:allowed] + snip_notice
|
|
1414
|
+
else:
|
|
1415
|
+
line = snip_notice
|
|
1416
|
+
lines.append(line)
|
|
1417
|
+
break
|
|
1418
|
+
else:
|
|
1419
|
+
break
|
|
1420
|
+
lines.append(line)
|
|
1421
|
+
total_length += len(line) + 1 # +1 for newline
|
|
1422
|
+
|
|
1423
|
+
# Format as table if requested
|
|
1424
|
+
if output_format == 'table':
|
|
1425
|
+
# Table header
|
|
1426
|
+
header = "Type\tContent\tDatetime\tStamp\tChannel"
|
|
1427
|
+
table_lines = [header]
|
|
1428
|
+
for ev in events:
|
|
1429
|
+
typ = ev.get('type', ev.get('role', ''))
|
|
1430
|
+
if typ == 'var':
|
|
1431
|
+
content = "{} = {}".format(ev.get('var_name', '?'), ev.get('var_value', '?'))
|
|
1432
|
+
else:
|
|
1433
|
+
content = ev.get('content', '')
|
|
1434
|
+
dt = ev.get('dt_utc') or ev.get('dt_bog') or ''
|
|
1435
|
+
stamp = ev.get('stamp', '')
|
|
1436
|
+
channel = ev.get('channel', '')
|
|
1437
|
+
row = "{}\t{}\t{}\t{}\t{}".format(typ, content, dt, stamp, channel)
|
|
1438
|
+
table_lines.append(row)
|
|
1439
|
+
output = "\n".join(table_lines)
|
|
1440
|
+
else:
|
|
1441
|
+
sep = "\n" if pretty else " "
|
|
1442
|
+
output = sep.join(lines)
|
|
1443
|
+
|
|
1444
|
+
else:
|
|
1445
|
+
raise ValueError("Unknown output_format: {}".format(output_format))
|
|
1446
|
+
|
|
1447
|
+
return output
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
MemoryManipulationExamples = """
|
|
1451
|
+
|
|
1452
|
+
MEMORY Class Usage Tutorial
|
|
1453
|
+
===========================
|
|
1454
|
+
|
|
1455
|
+
This tutorial demonstrates common workflows and transactions using the MEMORY class.
|
|
1456
|
+
The MEMORY class is an event-sourced state container for managing messages, logs,
|
|
1457
|
+
reflections, and variables in agentic or conversational systems.
|
|
1458
|
+
|
|
1459
|
+
Key Features:
|
|
1460
|
+
- Everything is an event with a sortable ID (alphabetical = chronological)
|
|
1461
|
+
- Events stored in a dictionary for O(1) lookup
|
|
1462
|
+
- Channel tracking for messages (omni-directional communication)
|
|
1463
|
+
- Full variable history with timestamps
|
|
1464
|
+
- Memory can be rehydrated from event list for cloud sync
|
|
1465
|
+
|
|
1466
|
+
------------------------------------------------------------
|
|
1467
|
+
1. Initialization
|
|
1468
|
+
------------------------------------------------------------
|
|
1469
|
+
|
|
1470
|
+
>>> mem = MEMORY()
|
|
1471
|
+
|
|
1472
|
+
Creates a new MEMORY instance with empty event stores and indexes.
|
|
1473
|
+
|
|
1474
|
+
------------------------------------------------------------
|
|
1475
|
+
2. Adding and Retrieving Messages with Channel Support
|
|
1476
|
+
------------------------------------------------------------
|
|
1477
|
+
|
|
1478
|
+
# Add user and assistant messages with channel tracking
|
|
1479
|
+
>>> mem.add_msg('user', 'Hello, assistant!', channel='webapp')
|
|
1480
|
+
>>> mem.add_msg('assistant', 'Hello, user! How can I help you?', channel='webapp')
|
|
1481
|
+
|
|
1482
|
+
# Messages from different channels
|
|
1483
|
+
>>> mem.add_msg('user', 'Quick question via phone', channel='ios')
|
|
1484
|
+
>>> mem.add_msg('user', 'Following up on Telegram', channel='telegram')
|
|
1485
|
+
|
|
1486
|
+
# Retrieve all messages as a list of dicts
|
|
1487
|
+
>>> mem.get_msgs()
|
|
1488
|
+
[{'role': 'user', 'content': 'Hello, assistant!', 'channel': 'webapp', ...}, ...]
|
|
1489
|
+
|
|
1490
|
+
# Filter messages by channel
|
|
1491
|
+
>>> mem.get_msgs(channel='telegram')
|
|
1492
|
+
|
|
1493
|
+
# Retrieve only user messages as a string
|
|
1494
|
+
>>> mem.get_msgs(include=['user'], repr='str')
|
|
1495
|
+
'user: Hello, assistant!'
|
|
1496
|
+
|
|
1497
|
+
# Get the last assistant message
|
|
1498
|
+
>>> mem.last_asst_msg()
|
|
1499
|
+
'Hello, user! How can I help you?'
|
|
1500
|
+
|
|
1501
|
+
------------------------------------------------------------
|
|
1502
|
+
3. Logging and Reflections
|
|
1503
|
+
------------------------------------------------------------
|
|
1504
|
+
|
|
1505
|
+
# Add a log entry
|
|
1506
|
+
>>> mem.add_log('System initialized.')
|
|
1507
|
+
|
|
1508
|
+
# Add a reflection (agent's internal reasoning)
|
|
1509
|
+
>>> mem.add_ref('User seems to be asking about weather patterns.')
|
|
1510
|
+
|
|
1511
|
+
# Retrieve the last log message
|
|
1512
|
+
>>> mem.last_log_msg()
|
|
1513
|
+
'System initialized.'
|
|
1514
|
+
|
|
1515
|
+
# Get all logs
|
|
1516
|
+
>>> mem.get_logs()
|
|
1517
|
+
|
|
1518
|
+
# Get all reflections
|
|
1519
|
+
>>> mem.get_refs()
|
|
1520
|
+
|
|
1521
|
+
------------------------------------------------------------
|
|
1522
|
+
4. Managing Variables (Full History Tracking)
|
|
1523
|
+
------------------------------------------------------------
|
|
1524
|
+
|
|
1525
|
+
# Set a variable with a description (logged as an event!)
|
|
1526
|
+
>>> mem.set_var('session_id', 'abc123', desc='Current session identifier')
|
|
1527
|
+
|
|
1528
|
+
# Update the variable (appends to history, doesn't overwrite)
|
|
1529
|
+
>>> mem.set_var('session_id', 'xyz789')
|
|
1530
|
+
|
|
1531
|
+
# Retrieve the current value of a variable
|
|
1532
|
+
>>> mem.get_var('session_id')
|
|
1533
|
+
'xyz789'
|
|
1534
|
+
|
|
1535
|
+
# Get all current non-deleted variables as a dict
|
|
1536
|
+
>>> mem.get_all_vars()
|
|
1537
|
+
{'session_id': 'xyz789'}
|
|
1538
|
+
|
|
1539
|
+
# Get full variable history as list of [stamp, value] pairs
|
|
1540
|
+
>>> mem.get_var_history('session_id')
|
|
1541
|
+
[['stamp1...', 'abc123'], ['stamp2...', 'xyz789']]
|
|
1542
|
+
|
|
1543
|
+
# Get variable description
|
|
1544
|
+
>>> mem.get_var_desc('session_id')
|
|
1545
|
+
'Current session identifier'
|
|
1546
|
+
|
|
1547
|
+
# Delete a variable (marks as deleted but preserves history)
|
|
1548
|
+
>>> mem.del_var('session_id')
|
|
1549
|
+
|
|
1550
|
+
# After deletion, get_var returns None
|
|
1551
|
+
>>> mem.get_var('session_id')
|
|
1552
|
+
None
|
|
1553
|
+
|
|
1554
|
+
# Check if a variable is deleted
|
|
1555
|
+
>>> mem.is_var_deleted('session_id')
|
|
1556
|
+
True
|
|
1557
|
+
|
|
1558
|
+
# History still shows all changes including deletion
|
|
1559
|
+
>>> mem.get_var_history('session_id')
|
|
1560
|
+
[['stamp1...', 'abc123'], ['stamp2...', 'xyz789'], ['stamp3...', <DELETED>]]
|
|
1561
|
+
|
|
1562
|
+
# Variable can be re-set after deletion
|
|
1563
|
+
>>> mem.set_var('session_id', 'new_value')
|
|
1564
|
+
>>> mem.get_var('session_id')
|
|
1565
|
+
'new_value'
|
|
1566
|
+
|
|
1567
|
+
------------------------------------------------------------
|
|
1568
|
+
5. Saving, Loading, and Copying State
|
|
1569
|
+
------------------------------------------------------------
|
|
1570
|
+
|
|
1571
|
+
# Save MEMORY state to a file
|
|
1572
|
+
>>> mem.save('memory_state.pkl')
|
|
1573
|
+
|
|
1574
|
+
# Save with compression
|
|
1575
|
+
>>> mem.save('memory_state.pkl.gz', compressed=True)
|
|
1576
|
+
|
|
1577
|
+
# Load MEMORY state from a file (rehydrates from events)
|
|
1578
|
+
>>> mem2 = MEMORY()
|
|
1579
|
+
>>> mem2.load('memory_state.pkl')
|
|
1580
|
+
|
|
1581
|
+
# Deep copy the MEMORY object
|
|
1582
|
+
>>> mem3 = mem.copy()
|
|
1583
|
+
|
|
1584
|
+
------------------------------------------------------------
|
|
1585
|
+
6. Rehydrating from Events (Cloud Sync Ready)
|
|
1586
|
+
------------------------------------------------------------
|
|
1587
|
+
|
|
1588
|
+
# Export all events
|
|
1589
|
+
>>> events = mem.get_events()
|
|
1590
|
+
|
|
1591
|
+
# Create a new memory from events (order doesn't matter, sorted by stamp)
|
|
1592
|
+
>>> mem_copy = MEMORY.from_events(events)
|
|
1593
|
+
|
|
1594
|
+
# Export snapshot for cloud storage
|
|
1595
|
+
>>> snapshot = mem.snapshot()
|
|
1596
|
+
# snapshot = {'id': '...', 'events': {...}}
|
|
1597
|
+
|
|
1598
|
+
------------------------------------------------------------
|
|
1599
|
+
7. Rendering and Exporting Memory Contents
|
|
1600
|
+
------------------------------------------------------------
|
|
1601
|
+
|
|
1602
|
+
# Render all messages as plain text (default)
|
|
1603
|
+
>>> print(mem.render())
|
|
1604
|
+
|
|
1605
|
+
# Render only user messages in markdown format
|
|
1606
|
+
>>> print(mem.render(role_filter=['user'], output_format='markdown'))
|
|
1607
|
+
|
|
1608
|
+
# Render as a table, including logs and reflections
|
|
1609
|
+
>>> print(mem.render(include=('msgs', 'logs', 'refs'), output_format='table'))
|
|
1610
|
+
|
|
1611
|
+
# Filter by channel
|
|
1612
|
+
>>> print(mem.render(channel_filter='telegram'))
|
|
1613
|
+
|
|
1614
|
+
# Render with a content keyword filter and max length
|
|
1615
|
+
>>> print(mem.render(content_filter='hello', max_length=50))
|
|
1616
|
+
|
|
1617
|
+
# Export as LLM-optimized conversation (for prompt construction)
|
|
1618
|
+
>>> print(mem.render(output_format='conversation', max_total_length=2000))
|
|
1619
|
+
|
|
1620
|
+
------------------------------------------------------------
|
|
1621
|
+
8. Advanced Filtering and Formatting
|
|
1622
|
+
------------------------------------------------------------
|
|
1623
|
+
|
|
1624
|
+
# Filter by role, mode, and channel
|
|
1625
|
+
>>> print(mem.render(role_filter=['assistant'], mode_filter=['text'], channel_filter='webapp'))
|
|
1626
|
+
|
|
1627
|
+
# Filter by time range (using datetime objects)
|
|
1628
|
+
>>> from datetime import datetime, timedelta
|
|
1629
|
+
>>> start = datetime.utcnow() - timedelta(hours=1)
|
|
1630
|
+
>>> end = datetime.utcnow()
|
|
1631
|
+
>>> print(mem.render(time_range=(start, end)))
|
|
1632
|
+
|
|
1633
|
+
# Limit number of events/messages
|
|
1634
|
+
>>> print(mem.render(event_limit=5))
|
|
1635
|
+
|
|
1636
|
+
# Get all events of specific types
|
|
1637
|
+
>>> mem.get_events(event_types=['msg', 'ref'])
|
|
1638
|
+
|
|
1639
|
+
------------------------------------------------------------
|
|
1640
|
+
9. Example: Full Workflow
|
|
1641
|
+
------------------------------------------------------------
|
|
1642
|
+
|
|
1643
|
+
>>> mem = MEMORY()
|
|
1644
|
+
>>> mem.add_msg('user', 'What is the weather today?', channel='webapp')
|
|
1645
|
+
>>> mem.add_msg('assistant', 'The weather is sunny and warm.', channel='webapp')
|
|
1646
|
+
>>> mem.set_var('weather', 'sunny and warm', desc='Latest weather info')
|
|
1647
|
+
>>> mem.add_ref('User is interested in outdoor activities.')
|
|
1648
|
+
>>> mem.add_log('Weather query processed successfully.')
|
|
1649
|
+
>>> print(mem.render(output_format='conversation'))
|
|
1650
|
+
|
|
1651
|
+
# Export all events and rehydrate
|
|
1652
|
+
>>> all_events = mem.get_events()
|
|
1653
|
+
>>> mem_restored = MEMORY.from_events(all_events, mem.id)
|
|
1654
|
+
|
|
1655
|
+
------------------------------------------------------------
|
|
1656
|
+
For more details, see the MEMORY class docstring and method documentation.
|
|
1657
|
+
------------------------------------------------------------
|
|
1658
|
+
"""
|