thoughtflow 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,142 +1,1658 @@
1
1
  """
2
- Base memory interface for ThoughtFlow.
2
+ MEMORY class for ThoughtFlow.
3
3
 
4
- Memory hooks provide a clean pattern for memory integration:
5
- - Memory retrieval produces context items
6
- - Those items are explicitly inserted into the message list
7
- - Memory writes are explicit events emitted by the agent run
8
-
9
- This avoids:
10
- - Hidden memory mutation
11
- - "Where did this context come from?"
12
- - Irreproducible behavior across runs
4
+ The MEMORY class serves as an event-sourced state container for managing events,
5
+ logs, messages, reflections, and variables within the Thoughtflow framework.
13
6
  """
14
7
 
15
8
  from __future__ import annotations
16
9
 
17
- from abc import ABC, abstractmethod
18
- from dataclasses import dataclass, field
19
- from datetime import datetime
20
- from typing import Any
10
+ import json
11
+ import copy
12
+ import pickle
13
+ import pprint
14
+ import datetime as dtt
15
+
16
+ from thoughtflow._util import (
17
+ event_stamp,
18
+ VAR_DELETED,
19
+ compress_to_json,
20
+ decompress_from_json,
21
+ estimate_size,
22
+ is_obj_ref,
23
+ truncate_content,
24
+ tz_bog,
25
+ tz_utc,
26
+ )
21
27
 
22
28
 
23
- @dataclass
24
- class MemoryEvent:
25
- """An event representing a memory operation.
29
+ class MEMORY:
30
+ """
31
+ The MEMORY class serves as an event-sourced state container for managing events,
32
+ logs, messages, reflections, and variables within the Thoughtflow framework.
33
+
34
+ All state changes are stored as events with sortable IDs (alphabetical = chronological).
35
+ Events are stored in a dictionary for O(1) lookup, with separate sorted indexes for
36
+ efficient retrieval. The memory can be fully reconstructed from its event list.
26
37
 
27
- Captured in traces to maintain full visibility into memory interactions.
38
+ Architecture:
39
+ - DATA LAYER: events dict (stamp → event object) - single source of truth
40
+ - INDEX LAYER: idx_* lists of [timestamp, stamp] pairs, sorted chronologically
41
+ - VARIABLE LAYER: vars dict with full history as list of [stamp, value] pairs
42
+ - OBJECT LAYER: objects dict for compressed large data storage
28
43
 
29
44
  Attributes:
30
- event_type: Type of event (retrieve, store, delete).
31
- timestamp: When the event occurred.
32
- query: The retrieval query (for retrieve events).
33
- content: The content being stored (for store events).
34
- results: Retrieved memories (for retrieve events).
35
- metadata: Additional event metadata.
45
+ id (str): Unique identifier for this MEMORY instance (event_stamp).
46
+ events (dict): Dictionary mapping event stamps to full event objects.
47
+ idx_msgs (list): Sorted list of [timestamp, stamp] pairs for messages.
48
+ idx_refs (list): Sorted list of [timestamp, stamp] pairs for reflections.
49
+ idx_logs (list): Sorted list of [timestamp, stamp] pairs for logs.
50
+ idx_vars (list): Sorted list of [timestamp, stamp] pairs for variable changes.
51
+ idx_all (list): Master sorted list of all [timestamp, stamp] pairs.
52
+ vars (dict): Dictionary mapping variable names to list of [stamp, value] pairs.
53
+ Deleted variables have VAR_DELETED as the value in their last entry.
54
+ Large values auto-convert to object references: {'_obj_ref': stamp}.
55
+ var_desc_history (dict): Dictionary mapping variable names to list of [stamp, description] pairs.
56
+ Tracks description evolution separately from value changes.
57
+ objects (dict): Dictionary mapping stamps to compressed object dicts.
58
+ Each object is JSON-serializable with base64-encoded compressed data.
59
+ object_threshold (int): Size threshold (bytes) for auto-converting vars to objects.
60
+ valid_roles (set): Set of valid roles for messages.
61
+ valid_modes (set): Set of valid modes for messages.
62
+ valid_channels (set): Set of valid communication channels.
63
+
64
+ Methods:
65
+ add_msg(role, content, mode='text', channel='unknown'): Add a message event with channel.
66
+ add_log(message): Add a log event.
67
+ add_ref(content): Add a reflection event.
68
+ get_msgs(...): Retrieve messages with filtering (supports channel filter).
69
+ get_events(...): Retrieve all events with filtering.
70
+ get_logs(limit=-1): Get log events.
71
+ get_refs(limit=-1): Get reflection events.
72
+ last_user_msg(): Get the last user message content.
73
+ last_asst_msg(): Get the last assistant message content.
74
+ last_sys_msg(): Get the last system message content.
75
+ last_log_msg(): Get the last log message content.
76
+ prepare_context(...): Prepare messages for LLM with smart truncation of old messages.
77
+ set_var(key, value, desc=''): Set a variable (appends to history, auto-converts large values to objects).
78
+ del_var(key): Mark a variable as deleted (preserves history).
79
+ get_var(key, resolve_refs=True): Get current value (auto-resolves object refs).
80
+ get_all_vars(resolve_refs=True): Get dict of all current non-deleted values.
81
+ get_var_history(key, resolve_refs=False): Get full history as list of [stamp, value].
82
+ get_var_desc(key): Get the current description of a variable.
83
+ get_var_desc_history(key): Get full description history as list of [stamp, description].
84
+ is_var_deleted(key): Check if a variable is currently marked as deleted.
85
+ set_obj(data, name=None, desc='', content_type='auto'): Store compressed object, optionally link to variable.
86
+ get_obj(stamp): Retrieve and decompress an object by stamp.
87
+ get_obj_info(stamp): Get object metadata without decompressing.
88
+ snapshot(): Export memory state as dict (includes events and objects).
89
+ save(filename, compressed=False): Save memory to file (pickle format).
90
+ load(filename, compressed=False): Load memory from file (pickle format).
91
+ to_json(filename=None, indent=2): Export memory to JSON file or string.
92
+ from_json(source): Class method to load memory from JSON file or string.
93
+ copy(): Return a deep copy of the MEMORY instance.
94
+ from_events(event_list, memory_id=None, objects=None): Class method to rehydrate from events/objects.
95
+
96
+ Example Usage:
97
+ memory = MEMORY()
98
+
99
+ # Messages have channel tracking (for omni-directional communication)
100
+ memory.add_msg('user', 'Hello!', channel='webapp')
101
+ memory.add_msg('assistant', 'Hi there!', channel='webapp')
102
+
103
+ # Logs and reflections are internal (no channel)
104
+ memory.add_log('User greeted the assistant')
105
+ memory.add_ref('User seems friendly')
106
+
107
+ # Variables maintain full history (no channel needed)
108
+ memory.set_var('foo', 42, 'A test variable')
109
+ memory.set_var('foo', 100) # Appends to history
110
+ memory.get_var('foo') # Returns 100
111
+ memory.get_var_history('foo') # Returns [[stamp1, 42], [stamp2, 100]]
112
+
113
+ # Deletion is a tombstone, not removal
114
+ memory.del_var('foo')
115
+ memory.get_var('foo') # Returns None
116
+ memory.is_var_deleted('foo') # Returns True
117
+ memory.set_var('foo', 200) # Can re-set after deletion
118
+
119
+ # Large values auto-convert to compressed objects
120
+ large_data = 'x' * 20000 # Exceeds default 10KB threshold
121
+ memory.set_var('big_data', large_data) # Auto-converts to object
122
+ memory.get_var('big_data') # Returns decompressed data
123
+ memory.get_var('big_data', resolve_refs=False) # Returns {'_obj_ref': stamp}
124
+
125
+ # Direct object storage
126
+ stamp = memory.set_obj(image_bytes, name='avatar', desc='User avatar')
127
+ memory.get_var('avatar') # Returns decompressed image_bytes
128
+ memory.get_obj(stamp) # Direct access by stamp
129
+ memory.get_obj_info(stamp) # Metadata without decompressing
130
+
131
+ # Inspect internal state (public attributes)
132
+ print(memory.events) # All events by stamp
133
+ print(memory.objects) # All objects by stamp
134
+ print(memory.vars) # Variable histories
135
+
136
+ memory.save('memory.pkl')
137
+ memory2 = MEMORY()
138
+ memory2.load('memory.pkl')
139
+
140
+ # Export to JSON (like DataFrame.to_csv)
141
+ memory.to_json('memory_backup.json')
142
+ memory4 = MEMORY.from_json('memory_backup.json')
143
+
144
+ # Rehydrate from events and objects (preserves all history)
145
+ snap = memory.snapshot()
146
+ memory3 = MEMORY.from_events(snap['events'].values(), objects=snap['objects'])
36
147
  """
37
148
 
38
- event_type: str # "retrieve", "store", "delete"
39
- timestamp: datetime = field(default_factory=datetime.now)
40
- query: str | None = None
41
- content: str | None = None
42
- results: list[dict[str, Any]] = field(default_factory=list)
43
- metadata: dict[str, Any] = field(default_factory=dict)
149
+ def __init__(self):
150
+ import bisect
151
+ self._bisect = bisect # Store for use in methods
152
+
153
+ self.id = event_stamp()
154
+
155
+ # DATA LAYER: Single source of truth for all events
156
+ self.events = {} # stamp → full event dict
157
+
158
+ # INDEX LAYER: Sorted lists of [timestamp, stamp] pairs
159
+ # Format: [[dt_utc, stamp], ...] - aligns with Redis sorted set structure
160
+ # Sorted by timestamp (ISO string sorts chronologically)
161
+ self.idx_msgs = [] # Message [timestamp, stamp] pairs
162
+ self.idx_refs = [] # Reflection [timestamp, stamp] pairs
163
+ self.idx_logs = [] # Log [timestamp, stamp] pairs
164
+ self.idx_vars = [] # Variable-change [timestamp, stamp] pairs
165
+ self.idx_all = [] # Master index (all [timestamp, stamp] pairs)
166
+
167
+ # VARIABLE LAYER: Full history with timestamps
168
+ # vars[key] = [[stamp1, value1], [stamp2, value2], ...]
169
+ # Deleted variables have VAR_DELETED as value in their last entry
170
+ self.vars = {} # var_name → list of [stamp, value] pairs
171
+ self.var_desc_history = {} # var_name → list of [stamp, description] pairs
172
+
173
+ # OBJECT LAYER: Compressed storage for large data
174
+ # objects[stamp] = {
175
+ # 'data': base64_encoded_compressed_string,
176
+ # 'size_original': int,
177
+ # 'size_compressed': int,
178
+ # 'content_type': str, # 'bytes', 'text', 'json', 'pickle'
179
+ # }
180
+ self.objects = {} # stamp → compressed object dict
181
+
182
+ # Threshold for auto-converting variables to objects (bytes)
183
+ self.object_threshold = 10000 # 10KB default
184
+
185
+ # Valid values
186
+ self.valid_roles = {
187
+ 'system',
188
+ 'user',
189
+ 'assistant',
190
+ 'reflection',
191
+ 'action',
192
+ 'query',
193
+ 'result',
194
+ 'logger',
195
+ }
196
+ self.valid_modes = {
197
+ 'text',
198
+ 'audio',
199
+ 'voice',
200
+ }
201
+ self.valid_channels = {
202
+ 'webapp',
203
+ 'ios',
204
+ 'android',
205
+ 'telegram',
206
+ 'whatsapp',
207
+ 'slack',
208
+ 'api',
209
+ 'cli',
210
+ 'unknown',
211
+ }
44
212
 
45
- def to_dict(self) -> dict[str, Any]:
46
- """Convert to a serializable dict.
213
+ #--- Internal Methods ---
47
214
 
215
+ def _add_to_index(self, index_list, timestamp, stamp):
216
+ """
217
+ Insert [timestamp, stamp] pair maintaining sorted order by timestamp.
218
+
219
+ Args:
220
+ index_list: One of the idx_* lists
221
+ timestamp: ISO timestamp string (dt_utc)
222
+ stamp: Event stamp ID
223
+ """
224
+ # bisect.insort sorts by first element of tuple/list (timestamp)
225
+ self._bisect.insort(index_list, [timestamp, stamp])
226
+
227
+ def _store_event(self, event_type, obj):
228
+ """
229
+ Store event in data layer and add to appropriate indexes.
230
+ This is the single entry point for all event creation.
231
+
232
+ Args:
233
+ event_type: One of 'msg', 'ref', 'log', 'var'
234
+ obj: The full event dict (must contain 'stamp' and 'dt_utc' keys)
235
+ """
236
+ stamp = obj['stamp']
237
+ timestamp = obj['dt_utc']
238
+
239
+ # Store in data layer
240
+ self.events[stamp] = obj
241
+
242
+ # Add to type-specific index (with [timestamp, stamp] format)
243
+ if event_type == 'msg':
244
+ self._add_to_index(self.idx_msgs, timestamp, stamp)
245
+ elif event_type == 'ref':
246
+ self._add_to_index(self.idx_refs, timestamp, stamp)
247
+ elif event_type == 'log':
248
+ self._add_to_index(self.idx_logs, timestamp, stamp)
249
+ elif event_type == 'var':
250
+ self._add_to_index(self.idx_vars, timestamp, stamp)
251
+
252
+ # Always add to master index
253
+ self._add_to_index(self.idx_all, timestamp, stamp)
254
+
255
+ def _get_events_from_index(self, index, limit=-1):
256
+ """
257
+ Get events from an index, optionally limited to last N.
258
+
259
+ Args:
260
+ index: One of the idx_* lists (format: [[timestamp, stamp], ...])
261
+ limit: Max events to return (-1 = all)
262
+
48
263
  Returns:
49
- Dict representation of the event.
264
+ List of event dicts
50
265
  """
51
- return {
52
- "event_type": self.event_type,
53
- "timestamp": self.timestamp.isoformat(),
54
- "query": self.query,
55
- "content": self.content,
56
- "results": self.results,
57
- "metadata": self.metadata,
266
+ pairs = index if limit <= 0 else index[-limit:]
267
+ # Extract stamp (second element) from each [timestamp, stamp] pair
268
+ return [self.events[ts_stamp[1]] for ts_stamp in pairs if ts_stamp[1] in self.events]
269
+
270
+ def _get_latest_desc(self, key):
271
+ """
272
+ Get the latest description for a variable from its description history.
273
+
274
+ Args:
275
+ key: Variable name
276
+
277
+ Returns:
278
+ Latest description string, or empty string if none exists
279
+ """
280
+ history = self.var_desc_history.get(key)
281
+ if not history:
282
+ return ''
283
+ return history[-1][1] # Return description from last [stamp, desc] pair
284
+
285
+ #--- Public Methods ---
286
+
287
+ def add_msg(self, role, content, mode='text', channel='unknown'):
288
+ """
289
+ Add a message event with channel tracking.
290
+
291
+ Args:
292
+ role: Message role (user, assistant, system, etc.)
293
+ content: Message content
294
+ mode: Communication mode (text, audio, voice)
295
+ channel: Communication channel (webapp, ios, telegram, etc.)
296
+ """
297
+ if role not in self.valid_roles:
298
+ raise ValueError("Invalid role '{}'. Must be one of: {}".format(role, sorted(self.valid_roles)))
299
+ if mode not in self.valid_modes:
300
+ raise ValueError("Invalid mode '{}'. Must be one of: {}".format(mode, sorted(self.valid_modes)))
301
+ if channel not in self.valid_channels:
302
+ raise ValueError("Invalid channel '{}'. Must be one of: {}".format(channel, sorted(self.valid_channels)))
303
+
304
+ stamp = event_stamp({'role': role, 'content': content})
305
+ msg = {
306
+ 'stamp' : stamp,
307
+ 'type' : 'msg',
308
+ 'role' : role,
309
+ 'content' : content,
310
+ 'mode' : mode,
311
+ 'channel' : channel,
312
+ 'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
313
+ 'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
58
314
  }
315
+ self._store_event('msg', msg)
59
316
 
317
+ def add_log(self, message):
318
+ """
319
+ Add a log event.
320
+
321
+ Args:
322
+ message: Log message content
323
+ """
324
+ stamp = event_stamp({'content': message})
325
+ log_entry = {
326
+ 'stamp' : stamp,
327
+ 'type' : 'log',
328
+ 'role' : 'logger',
329
+ 'content' : message,
330
+ 'mode' : 'text',
331
+ 'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
332
+ 'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
333
+ }
334
+ self._store_event('log', log_entry)
60
335
 
61
- class MemoryHook(ABC):
62
- """Abstract base class for memory integrations.
63
-
64
- Memory hooks allow agents to:
65
- - Retrieve relevant context from long-term memory
66
- - Store new information for future retrieval
67
- - Maintain conversation history beyond context window
68
-
69
- Implementations might include:
70
- - Vector database (Pinecone, Weaviate, ChromaDB)
71
- - Key-value store
72
- - SQL database
73
- - File-based storage
74
-
75
- Example:
76
- >>> class SimpleMemory(MemoryHook):
77
- ... def __init__(self):
78
- ... self.memories = []
79
- ...
80
- ... def retrieve(self, query, k=5):
81
- ... # Simple keyword matching (real impl would use embeddings)
82
- ... matches = [m for m in self.memories if query.lower() in m["content"].lower()]
83
- ... return matches[:k]
84
- ...
85
- ... def store(self, content, metadata=None):
86
- ... self.memories.append({"content": content, "metadata": metadata or {}})
87
- """
336
+ def add_ref(self, content):
337
+ """
338
+ Add a reflection event.
339
+
340
+ Args:
341
+ content: Reflection content
342
+ """
343
+ stamp = event_stamp({'content': content})
344
+ ref = {
345
+ 'stamp' : stamp,
346
+ 'type' : 'ref',
347
+ 'role' : 'reflection',
348
+ 'content' : content,
349
+ 'mode' : 'text',
350
+ 'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
351
+ 'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
352
+ }
353
+ self._store_event('ref', ref)
88
354
 
89
- @abstractmethod
90
- def retrieve(
91
- self,
92
- query: str,
93
- k: int = 5,
94
- filters: dict[str, Any] | None = None,
95
- ) -> list[dict[str, Any]]:
96
- """Retrieve relevant memories for a query.
355
+ #---
97
356
 
357
+ def get_msgs(self,
358
+ limit=-1,
359
+ include=None,
360
+ exclude=None,
361
+ repr='list',
362
+ channel=None,
363
+ ):
364
+ """
365
+ Get messages with flexible filtering.
366
+
98
367
  Args:
99
- query: The search query.
100
- k: Maximum number of results to return.
101
- filters: Optional filters to apply.
368
+ limit: Max messages to return (-1 = all)
369
+ include: List of roles to include (None = all)
370
+ exclude: List of roles to exclude (None = none)
371
+ repr: Output format ('list', 'str', 'pprint1')
372
+ channel: Filter by channel (None = all)
373
+
374
+ Returns:
375
+ Messages in the specified format
376
+ """
377
+ # Get all messages from index
378
+ events = self._get_events_from_index(self.idx_msgs, -1)
379
+
380
+ # Apply filters
381
+ if include:
382
+ events = [e for e in events if e.get('role') in include]
383
+ if exclude:
384
+ exclude = exclude or []
385
+ events = [e for e in events if e.get('role') not in exclude]
386
+ if channel:
387
+ events = [e for e in events if e.get('channel') == channel]
388
+
389
+ if limit > 0:
390
+ events = events[-limit:]
391
+
392
+ if repr == 'list':
393
+ return events
394
+ elif repr == 'str':
395
+ return '\n'.join(["{}: {}".format(e['role'], e['content']) for e in events])
396
+ elif repr == 'pprint1':
397
+ return pprint.pformat(events, indent=1)
398
+ else:
399
+ raise ValueError("Invalid repr option. Choose from 'list', 'str', or 'pprint1'.")
102
400
 
401
+ def get_events(self, limit=-1, event_types=None, channel=None):
402
+ """
403
+ Get all events, optionally filtered by type and channel.
404
+
405
+ Args:
406
+ limit: Max events (-1 = all)
407
+ event_types: List like ['msg', 'log', 'ref', 'var'] (None = all)
408
+ channel: Filter by channel (None = all)
409
+
103
410
  Returns:
104
- List of memory dicts, each containing at least "content".
411
+ List of event dicts
105
412
  """
106
- raise NotImplementedError
413
+ events = self._get_events_from_index(self.idx_all, -1)
414
+
415
+ if event_types:
416
+ events = [e for e in events if e.get('type') in event_types]
417
+ if channel:
418
+ events = [e for e in events if e.get('channel') == channel]
419
+
420
+ if limit > 0:
421
+ events = events[-limit:]
422
+
423
+ return events
424
+
425
+ def get_logs(self, limit=-1):
426
+ """
427
+ Get log events.
428
+
429
+ Args:
430
+ limit: Max logs to return (-1 = all)
431
+
432
+ Returns:
433
+ List of log event dicts
434
+ """
435
+ events = self._get_events_from_index(self.idx_logs, -1)
436
+
437
+ if limit > 0:
438
+ events = events[-limit:]
439
+
440
+ return events
441
+
442
+ def get_refs(self, limit=-1):
443
+ """
444
+ Get reflection events.
445
+
446
+ Args:
447
+ limit: Max reflections to return (-1 = all)
448
+
449
+ Returns:
450
+ List of reflection event dicts
451
+ """
452
+ events = self._get_events_from_index(self.idx_refs, -1)
453
+
454
+ if limit > 0:
455
+ events = events[-limit:]
456
+
457
+ return events
458
+
459
+ def last_user_msg(self):
460
+ """Get the content of the last user message."""
461
+ msgs = self.get_msgs(include=['user'])
462
+ return msgs[-1]['content'] if msgs else ''
463
+
464
+ def last_asst_msg(self):
465
+ """Get the content of the last assistant message."""
466
+ msgs = self.get_msgs(include=['assistant'])
467
+ return msgs[-1]['content'] if msgs else ''
468
+
469
+ def last_sys_msg(self):
470
+ """Get the content of the last system message."""
471
+ msgs = self.get_msgs(include=['system'])
472
+ return msgs[-1]['content'] if msgs else ''
473
+
474
+ def last_log_msg(self):
475
+ """Get the content of the last log message."""
476
+ logs = self.get_logs()
477
+ return logs[-1]['content'] if logs else ''
107
478
 
108
- @abstractmethod
109
- def store(
479
+ def prepare_context(
110
480
  self,
111
- content: str,
112
- metadata: dict[str, Any] | None = None,
113
- ) -> str:
114
- """Store a new memory.
481
+ recent_count=6,
482
+ truncate_threshold=500,
483
+ header_len=200,
484
+ footer_len=200,
485
+ include_roles=('user', 'assistant'),
486
+ format='list',
487
+ ):
488
+ """
489
+ Prepare messages for LLM context with smart truncation of old messages.
490
+
491
+ Messages within the most recent `recent_count` are returned unchanged.
492
+ Older messages that exceed `truncate_threshold` chars have their middle
493
+ content truncated, preserving a header and footer with an expandable marker.
494
+
495
+ The truncation marker includes the message's stamp, allowing an LLM to
496
+ request expansion of specific messages via memory.events[stamp].
497
+
498
+ Args:
499
+ recent_count: Number of recent messages to keep untruncated (default 6)
500
+ truncate_threshold: Min chars before truncation applies (default 500)
501
+ header_len: Characters to keep from start (default 200)
502
+ footer_len: Characters to keep from end (default 200)
503
+ include_roles: Tuple of roles to include (default ('user', 'assistant'))
504
+ format: 'list' returns list of dicts, 'openai' returns OpenAI-compatible format
505
+
506
+ Returns:
507
+ List of message dicts with 'role' and 'content' keys.
508
+ Older messages may have truncated content with expansion markers.
509
+
510
+ Example:
511
+ # Get context-ready messages for LLM
512
+ context = memory.prepare_context(recent_count=6, truncate_threshold=500)
513
+
514
+ # Use with OpenAI API
515
+ context = memory.prepare_context(format='openai')
516
+ response = client.chat.completions.create(
517
+ model='gpt-4',
518
+ messages=context
519
+ )
520
+ """
521
+ # Get all messages for included roles
522
+ msgs = self.get_msgs(include=list(include_roles))
523
+
524
+ if not msgs:
525
+ return []
526
+
527
+ # Determine cutoff point for truncation
528
+ # Messages at index < cutoff_idx are candidates for truncation
529
+ cutoff_idx = max(0, len(msgs) - recent_count)
530
+
531
+ result = []
532
+ for i, msg in enumerate(msgs):
533
+ stamp = msg.get('stamp', '')
534
+ role = msg.get('role', 'user')
535
+ content = msg.get('content', '')
536
+
537
+ # Apply truncation to older messages
538
+ if i < cutoff_idx:
539
+ content = truncate_content(
540
+ content,
541
+ stamp,
542
+ threshold=truncate_threshold,
543
+ header_len=header_len,
544
+ footer_len=footer_len
545
+ )
546
+
547
+ if format == 'openai':
548
+ # OpenAI expects 'user', 'assistant', 'system' roles
549
+ result.append({'role': role, 'content': content})
550
+ else:
551
+ # List format includes more metadata
552
+ result.append({
553
+ 'role': role,
554
+ 'content': content,
555
+ 'stamp': stamp,
556
+ 'truncated': i < cutoff_idx and len(msg.get('content', '')) > truncate_threshold,
557
+ })
558
+
559
+ return result
560
+
561
+ #---
562
+
563
+ def set_var(self, key, value, desc=''):
564
+ """
565
+ Store a variable by appending to its history list.
566
+ Variable changes are first-class events in the event stream.
567
+ Each variable maintains a full history of [stamp, value] pairs.
568
+
569
+ Large values (exceeding object_threshold) are automatically converted
570
+ to compressed objects, with an object reference stored in the history.
571
+
572
+ Descriptions are tracked separately in var_desc_history since they
573
+ change less frequently than values.
574
+
575
+ Args:
576
+ key: Variable name
577
+ value: Variable value (any type)
578
+ desc: Optional description (appended to description history if provided)
579
+ """
580
+ # Check if value should be stored as object (auto-conversion)
581
+ value_size = estimate_size(value)
582
+ if value_size > self.object_threshold:
583
+ # Store as object, use reference in history
584
+ obj_stamp = event_stamp({'obj': str(value)[:50]})
585
+ compressed_obj = compress_to_json(value)
586
+ self.objects[obj_stamp] = compressed_obj
587
+ stored_value = {'_obj_ref': obj_stamp}
588
+ else:
589
+ stored_value = value
590
+
591
+ stamp = event_stamp({'var': key, 'value': str(value)[:100]})
592
+
593
+ # Initialize history list if this is a new variable
594
+ if key not in self.vars:
595
+ self.vars[key] = []
596
+
597
+ # Append new [stamp, stored_value] pair to history
598
+ self.vars[key].append([stamp, stored_value])
599
+
600
+ # Track description changes separately (only when provided)
601
+ if desc:
602
+ if key not in self.var_desc_history:
603
+ self.var_desc_history[key] = []
604
+ self.var_desc_history[key].append([stamp, desc])
605
+
606
+ # Get latest description from history (or the one we just set)
607
+ current_desc = desc if desc else self._get_latest_desc(key)
608
+
609
+ # Create variable-change event
610
+ var_event = {
611
+ 'stamp' : stamp,
612
+ 'type' : 'var',
613
+ 'role' : 'system',
614
+ 'var_name' : key,
615
+ 'var_value': stored_value, # Store reference if large, else value
616
+ 'var_desc' : current_desc,
617
+ 'content' : "Variable '{}' set".format(key) + (' (as object ref)' if is_obj_ref(stored_value) else ''),
618
+ 'mode' : 'text',
619
+ 'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
620
+ 'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
621
+ }
622
+ self._store_event('var', var_event)
623
+
624
+ def del_var(self, key):
625
+ """
626
+ Mark a variable as deleted by appending a VAR_DELETED tombstone.
627
+ The variable's history is preserved; it can be re-set later.
628
+
629
+ Args:
630
+ key: Variable name to delete
631
+
632
+ Raises:
633
+ KeyError: If the variable doesn't exist
634
+ """
635
+ if key not in self.vars:
636
+ raise KeyError("Variable '{}' does not exist".format(key))
637
+
638
+ stamp = event_stamp({'var': key, 'action': 'delete'})
639
+
640
+ # Append deletion marker to history
641
+ self.vars[key].append([stamp, VAR_DELETED])
642
+
643
+ # Create variable-delete event
644
+ var_event = {
645
+ 'stamp' : stamp,
646
+ 'type' : 'var',
647
+ 'role' : 'system',
648
+ 'var_name' : key,
649
+ 'var_value': None,
650
+ 'var_deleted': True,
651
+ 'var_desc' : self._get_latest_desc(key),
652
+ 'content' : "Variable '{}' deleted".format(key),
653
+ 'mode' : 'text',
654
+ 'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
655
+ 'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
656
+ }
657
+ self._store_event('var', var_event)
658
+
659
+ def get_var(self, key, resolve_refs=True):
660
+ """
661
+ Return the current value of a variable.
662
+
663
+ If the value is an object reference, it is automatically resolved
664
+ and the decompressed data is returned (unless resolve_refs=False).
665
+
666
+ Args:
667
+ key: Variable name
668
+ resolve_refs: If True (default), resolve object references to actual data
669
+
670
+ Returns:
671
+ Current value, or None if not found or deleted
672
+ """
673
+ history = self.vars.get(key)
674
+ if not history:
675
+ return None
676
+
677
+ # Get the last value
678
+ last_stamp, last_value = history[-1]
679
+
680
+ # Return None if deleted
681
+ if last_value is VAR_DELETED:
682
+ return None
683
+
684
+ # Resolve object reference if applicable
685
+ if resolve_refs and is_obj_ref(last_value):
686
+ return self.get_obj(last_value['_obj_ref'])
687
+
688
+ return last_value
689
+
690
+ def is_var_deleted(self, key):
691
+ """
692
+ Check if a variable is currently marked as deleted.
693
+
694
+ Args:
695
+ key: Variable name
696
+
697
+ Returns:
698
+ True if the variable exists and is deleted, False otherwise
699
+ """
700
+ history = self.vars.get(key)
701
+ if not history:
702
+ return False
703
+
704
+ last_stamp, last_value = history[-1]
705
+ return last_value is VAR_DELETED
706
+
707
+ def get_all_vars(self, resolve_refs=True):
708
+ """
709
+ Get a dictionary of all current non-deleted variable values.
710
+
711
+ Args:
712
+ resolve_refs: If True (default), resolve object references to actual data
713
+
714
+ Returns:
715
+ dict: Variable name → current value (excludes deleted variables)
716
+ """
717
+ result = {}
718
+ for key, history in self.vars.items():
719
+ if history:
720
+ last_stamp, last_value = history[-1]
721
+ if last_value is not VAR_DELETED:
722
+ # Resolve object reference if applicable
723
+ if resolve_refs and is_obj_ref(last_value):
724
+ result[key] = self.get_obj(last_value['_obj_ref'])
725
+ else:
726
+ result[key] = last_value
727
+ return result
115
728
 
729
+ def get_var_history(self, key, resolve_refs=False):
730
+ """
731
+ Get full history of a variable as list of [stamp, value] pairs.
732
+ Includes all historical values and deletion markers.
733
+
116
734
  Args:
117
- content: The content to store.
118
- metadata: Optional metadata to associate with the memory.
735
+ key: Variable name
736
+ resolve_refs: If True, resolve object references to actual data.
737
+ Default False to preserve the raw history structure.
738
+
739
+ Returns:
740
+ List of [stamp, value] pairs, or empty list if variable doesn't exist.
741
+ Deleted entries have VAR_DELETED as the value.
742
+ Object references appear as {'_obj_ref': stamp} unless resolve_refs=True.
743
+ """
744
+ history = self.vars.get(key, [])
745
+ if not resolve_refs:
746
+ return list(history)
747
+
748
+ # Resolve object references
749
+ resolved = []
750
+ for stamp, value in history:
751
+ if is_obj_ref(value):
752
+ resolved.append([stamp, self.get_obj(value['_obj_ref'])])
753
+ else:
754
+ resolved.append([stamp, value])
755
+ return resolved
119
756
 
757
+ def get_var_desc(self, key):
758
+ """
759
+ Get the current (latest) description of a variable.
760
+
761
+ Args:
762
+ key: Variable name
763
+
120
764
  Returns:
121
- ID of the stored memory.
765
+ Latest description string, or default message if no description exists
122
766
  """
123
- raise NotImplementedError
767
+ desc = self._get_latest_desc(key)
768
+ return desc if desc else "No description found."
124
769
 
125
- def delete(self, memory_id: str) -> bool:
126
- """Delete a memory by ID.
770
+ def get_var_desc_history(self, key):
771
+ """
772
+ Get full history of a variable's descriptions as list of [stamp, description] pairs.
773
+
774
+ Args:
775
+ key: Variable name
776
+
777
+ Returns:
778
+ List of [stamp, description] pairs, or empty list if variable has no descriptions.
779
+ """
780
+ return list(self.var_desc_history.get(key, []))
781
+
782
+ #--- Object Methods ---
127
783
 
784
+ def set_obj(self, data, name=None, desc='', content_type='auto'):
785
+ """
786
+ Store a large object in compressed form.
787
+
788
+ Objects are compressed using zlib and base64-encoded for JSON serialization.
789
+ Optionally creates a variable reference to the stored object.
790
+
128
791
  Args:
129
- memory_id: ID of the memory to delete.
792
+ data: The data to store (bytes, str, or any JSON/pickle-serializable object)
793
+ name: Optional variable name to create a reference
794
+ desc: Description (used only if name is provided)
795
+ content_type: 'bytes', 'text', 'json', 'pickle', or 'auto'
796
+
797
+ Returns:
798
+ str: The object stamp (ID)
799
+
800
+ Example:
801
+ # Store raw data, get stamp back
802
+ stamp = memory.set_obj(large_text)
803
+
804
+ # Store and create variable reference
805
+ memory.set_obj(image_bytes, name='profile_pic', desc='User avatar')
806
+ memory.get_var('profile_pic') # Returns decompressed image_bytes
807
+ """
808
+ stamp = event_stamp({'obj': str(data)[:50]})
809
+
810
+ # Compress and store
811
+ compressed_obj = compress_to_json(data, content_type)
812
+ self.objects[stamp] = compressed_obj
813
+
814
+ # Optionally create a variable reference
815
+ if name:
816
+ obj_ref = {'_obj_ref': stamp}
817
+ # Store reference directly in vars (bypassing size check)
818
+ var_stamp = event_stamp({'var': name})
819
+
820
+ # Initialize history if needed
821
+ if name not in self.vars:
822
+ self.vars[name] = []
823
+
824
+ # Append [stamp, obj_ref] to history
825
+ self.vars[name].append([var_stamp, obj_ref])
826
+
827
+ # Track description changes separately (only when provided)
828
+ if desc:
829
+ if name not in self.var_desc_history:
830
+ self.var_desc_history[name] = []
831
+ self.var_desc_history[name].append([var_stamp, desc])
832
+
833
+ # Get latest description for the event
834
+ current_desc = desc if desc else self._get_latest_desc(name)
835
+
836
+ # Store the var event
837
+ var_event = {
838
+ 'type' : 'var',
839
+ 'stamp' : var_stamp,
840
+ 'var_name' : name,
841
+ 'var_value': obj_ref, # Store the reference, not the data
842
+ 'var_deleted': False,
843
+ 'var_desc' : current_desc,
844
+ 'content' : "Variable '{}' set to object ref: {}".format(name, stamp),
845
+ 'mode' : 'text',
846
+ 'dt_bog' : str(dtt.datetime.now(tz_bog))[:23],
847
+ 'dt_utc' : str(dtt.datetime.now(tz_utc))[:23],
848
+ }
849
+ self._store_event('var', var_event)
850
+
851
+ return stamp
130
852
 
853
+ def get_obj(self, stamp):
854
+ """
855
+ Retrieve and decompress an object by its stamp.
856
+
857
+ Args:
858
+ stamp: The object's event stamp
859
+
131
860
  Returns:
132
- True if deleted, False if not found.
861
+ The decompressed original data, or None if not found
862
+
863
+ Example:
864
+ data = memory.get_obj('A1B2C3...')
133
865
  """
134
- raise NotImplementedError("delete() not implemented for this memory hook")
866
+ obj_dict = self.objects.get(stamp)
867
+ if obj_dict is None:
868
+ return None
869
+ return decompress_from_json(obj_dict)
135
870
 
136
- def clear(self) -> int:
137
- """Clear all memories.
871
+ def get_obj_info(self, stamp):
872
+ """
873
+ Get metadata about a stored object without decompressing it.
874
+
875
+ Args:
876
+ stamp: The object's event stamp
877
+
878
+ Returns:
879
+ dict with size_original, size_compressed, content_type, or None if not found
880
+ """
881
+ obj_dict = self.objects.get(stamp)
882
+ if obj_dict is None:
883
+ return None
884
+ return {
885
+ 'stamp': stamp,
886
+ 'size_original': obj_dict['size_original'],
887
+ 'size_compressed': obj_dict['size_compressed'],
888
+ 'content_type': obj_dict['content_type'],
889
+ 'compression_ratio': obj_dict['size_compressed'] / obj_dict['size_original'] if obj_dict['size_original'] > 0 else 0,
890
+ }
891
+
892
+ #---
138
893
 
894
+ def snapshot(self):
895
+ """
896
+ Export memory state as dict.
897
+ Stores events and objects - indexes can be rehydrated from events.
898
+
139
899
  Returns:
140
- Number of memories deleted.
900
+ dict with 'id', 'events', and 'objects' keys
901
+ """
902
+ return {
903
+ 'id': self.id,
904
+ 'events': dict(self.events), # All events by stamp
905
+ 'objects': dict(self.objects), # All objects by stamp (already JSON-serializable)
906
+ }
907
+
908
+ def save(self, filename, compressed=False):
909
+ """
910
+ Save memory to file.
911
+
912
+ Args:
913
+ filename: Path to save file
914
+ compressed: If True, use gzip compression
141
915
  """
142
- raise NotImplementedError("clear() not implemented for this memory hook")
916
+ import gzip
917
+ data = self.snapshot()
918
+ if compressed:
919
+ with gzip.open(filename, 'wb') as f:
920
+ pickle.dump(data, f)
921
+ else:
922
+ with open(filename, 'wb') as f:
923
+ pickle.dump(data, f)
924
+
925
+ def load(self, filename, compressed=False):
926
+ """
927
+ Load memory from file by rehydrating from events.
928
+
929
+ Args:
930
+ filename: Path to load file
931
+ compressed: If True, expect gzip compression
932
+ """
933
+ import gzip
934
+ if compressed:
935
+ with gzip.open(filename, 'rb') as f:
936
+ data = pickle.load(f)
937
+ else:
938
+ with open(filename, 'rb') as f:
939
+ data = pickle.load(f)
940
+
941
+ # Rehydrate from events (pass objects if present)
942
+ event_list = list(data.get('events', {}).values())
943
+ objects = data.get('objects', {})
944
+ mem = MEMORY.from_events(event_list, data.get('id'), objects=objects)
945
+
946
+ # Copy state to self
947
+ self.id = mem.id
948
+ self.events = mem.events
949
+ self.idx_msgs = mem.idx_msgs
950
+ self.idx_refs = mem.idx_refs
951
+ self.idx_logs = mem.idx_logs
952
+ self.idx_vars = mem.idx_vars
953
+ self.idx_all = mem.idx_all
954
+ self.vars = mem.vars
955
+ self.var_desc_history = mem.var_desc_history
956
+ self.objects = mem.objects
957
+
958
+ def copy(self):
959
+ """Return a deep copy of the MEMORY instance."""
960
+ return copy.deepcopy(self)
961
+
962
+ def to_json(self, filename=None, indent=2):
963
+ """
964
+ Export memory to JSON format.
965
+
966
+ Like DataFrame.to_csv(), this allows saving memory state to a portable
967
+ JSON format that can be loaded later with from_json().
968
+
969
+ Args:
970
+ filename: If provided, write to file. Otherwise return JSON string.
971
+ indent: JSON indentation level (default 2, use None for compact)
972
+
973
+ Returns:
974
+ JSON string if filename is None, else None
975
+
976
+ Example:
977
+ # Save to file
978
+ memory.to_json('memory_backup.json')
979
+
980
+ # Get JSON string
981
+ json_str = memory.to_json()
982
+ """
983
+ # Prepare data for JSON serialization
984
+ # Need to handle VAR_DELETED sentinel in vars history
985
+ def serialize_var_history(var_dict):
986
+ """Convert VAR_DELETED sentinel to JSON-safe marker."""
987
+ result = {}
988
+ for key, history in var_dict.items():
989
+ serialized_history = []
990
+ for stamp, value in history:
991
+ if value is VAR_DELETED:
992
+ serialized_history.append([stamp, '__VAR_DELETED__'])
993
+ else:
994
+ serialized_history.append([stamp, value])
995
+ result[key] = serialized_history
996
+ return result
997
+
998
+ data = {
999
+ 'version': '1.0',
1000
+ 'id': self.id,
1001
+ 'events': self.events,
1002
+ 'objects': self.objects,
1003
+ 'vars': serialize_var_history(self.vars),
1004
+ 'var_desc_history': self.var_desc_history,
1005
+ 'idx_msgs': self.idx_msgs,
1006
+ 'idx_refs': self.idx_refs,
1007
+ 'idx_logs': self.idx_logs,
1008
+ 'idx_vars': self.idx_vars,
1009
+ 'idx_all': self.idx_all,
1010
+ }
1011
+
1012
+ json_str = json.dumps(data, indent=indent, ensure_ascii=False)
1013
+
1014
+ if filename:
1015
+ with open(filename, 'w', encoding='utf-8') as f:
1016
+ f.write(json_str)
1017
+ return None
1018
+ return json_str
1019
+
1020
+ @classmethod
1021
+ def from_json(cls, source):
1022
+ """
1023
+ Create MEMORY instance from JSON.
1024
+
1025
+ Like DataFrame.read_csv(), this loads a memory from a JSON file or string
1026
+ that was saved with to_json().
1027
+
1028
+ Args:
1029
+ source: JSON string or filename path
1030
+
1031
+ Returns:
1032
+ New MEMORY instance
1033
+
1034
+ Example:
1035
+ # Load from file
1036
+ memory = MEMORY.from_json('memory_backup.json')
1037
+
1038
+ # Load from JSON string
1039
+ memory = MEMORY.from_json(json_str)
1040
+ """
1041
+ import os
1042
+
1043
+ # Determine if source is a file or JSON string
1044
+ if os.path.isfile(source):
1045
+ with open(source, 'r', encoding='utf-8') as f:
1046
+ data = json.load(f)
1047
+ else:
1048
+ data = json.loads(source)
1049
+
1050
+ # Helper to restore VAR_DELETED sentinel
1051
+ def deserialize_var_history(var_dict):
1052
+ """Convert JSON marker back to VAR_DELETED sentinel."""
1053
+ result = {}
1054
+ for key, history in var_dict.items():
1055
+ deserialized_history = []
1056
+ for stamp, value in history:
1057
+ if value == '__VAR_DELETED__':
1058
+ deserialized_history.append([stamp, VAR_DELETED])
1059
+ else:
1060
+ deserialized_history.append([stamp, value])
1061
+ result[key] = deserialized_history
1062
+ return result
1063
+
1064
+ # Create new instance
1065
+ mem = cls()
1066
+ mem.id = data.get('id', mem.id)
1067
+ mem.events = data.get('events', {})
1068
+ mem.objects = data.get('objects', {})
1069
+ mem.vars = deserialize_var_history(data.get('vars', {}))
1070
+ mem.var_desc_history = data.get('var_desc_history', {})
1071
+ mem.idx_msgs = data.get('idx_msgs', [])
1072
+ mem.idx_refs = data.get('idx_refs', [])
1073
+ mem.idx_logs = data.get('idx_logs', [])
1074
+ mem.idx_vars = data.get('idx_vars', [])
1075
+ mem.idx_all = data.get('idx_all', [])
1076
+
1077
+ return mem
1078
+
1079
+ @classmethod
1080
+ def from_events(cls, event_list, memory_id=None, objects=None):
1081
+ """
1082
+ Rehydrate a MEMORY instance from a list of events.
1083
+ This is the inverse of snapshot - enables cloud sync.
1084
+
1085
+ Args:
1086
+ event_list: List of event dicts (order doesn't matter, will be sorted)
1087
+ memory_id: Optional ID for the memory instance
1088
+ objects: Optional dict of objects (stamp → compressed object dict)
1089
+
1090
+ Returns:
1091
+ New MEMORY instance with all events loaded
1092
+ """
1093
+ mem = cls()
1094
+ if memory_id:
1095
+ mem.id = memory_id
1096
+
1097
+ # Restore objects if provided
1098
+ if objects:
1099
+ mem.objects = dict(objects)
1100
+
1101
+ # Sort events by timestamp (dt_utc) for chronological order
1102
+ sorted_events = sorted(event_list, key=lambda e: e.get('dt_utc', ''))
1103
+
1104
+ for ev in sorted_events:
1105
+ stamp = ev.get('stamp')
1106
+ timestamp = ev.get('dt_utc', '')
1107
+ if not stamp:
1108
+ continue
1109
+
1110
+ event_type = ev.get('type', 'msg')
1111
+
1112
+ # Store in data layer
1113
+ mem.events[stamp] = ev
1114
+
1115
+ # Create [timestamp, stamp] pair for indexes
1116
+ ts_pair = [timestamp, stamp]
1117
+
1118
+ # Add to appropriate index (direct append since already sorted by timestamp)
1119
+ if event_type == 'msg':
1120
+ mem.idx_msgs.append(ts_pair)
1121
+ elif event_type == 'ref':
1122
+ mem.idx_refs.append(ts_pair)
1123
+ elif event_type == 'log':
1124
+ mem.idx_logs.append(ts_pair)
1125
+ elif event_type == 'var':
1126
+ mem.idx_vars.append(ts_pair)
1127
+ # Replay variable state into history list
1128
+ var_name = ev.get('var_name')
1129
+ if var_name:
1130
+ # Initialize history list if needed
1131
+ if var_name not in mem.vars:
1132
+ mem.vars[var_name] = []
1133
+
1134
+ # Determine value (check for deletion marker)
1135
+ if ev.get('var_deleted', False):
1136
+ value = VAR_DELETED
1137
+ else:
1138
+ value = ev.get('var_value')
1139
+
1140
+ # Append to history
1141
+ mem.vars[var_name].append([stamp, value])
1142
+
1143
+ # Rebuild description history if present
1144
+ var_desc = ev.get('var_desc')
1145
+ if var_desc:
1146
+ if var_name not in mem.var_desc_history:
1147
+ mem.var_desc_history[var_name] = []
1148
+ # Only add if different from last description (avoid duplicates)
1149
+ desc_hist = mem.var_desc_history[var_name]
1150
+ if not desc_hist or desc_hist[-1][1] != var_desc:
1151
+ desc_hist.append([stamp, var_desc])
1152
+
1153
+ mem.idx_all.append(ts_pair)
1154
+
1155
+ return mem
1156
+
1157
+ #---
1158
+
1159
+ # The render method provides a flexible way to display or export the MEMORY's messages or events.
1160
+ # It supports event type selection, output format, advanced filtering, metadata inclusion, pretty-printing, and message condensing.
1161
+ def render(
1162
+ self,
1163
+ include=('msgs',), # Tuple/list of event types to include: 'msgs', 'logs', 'refs', 'vars', 'events'
1164
+ output_format='plain', # 'plain', 'markdown', 'json', 'table', 'conversation'
1165
+ role_filter=None, # List of roles to include (None = all)
1166
+ mode_filter=None, # List of modes to include (None = all)
1167
+ channel_filter=None, # Channel to filter by (None = all)
1168
+ content_filter=None, # String or list of keywords to filter content (None = all)
1169
+ include_metadata=True, # Whether to include metadata (timestamps, roles, etc.)
1170
+ pretty=True, # Pretty-print for human readability
1171
+ max_length=None, # Max total length of output (int, None = unlimited)
1172
+ condense_msg=True, # If True, snip/condense messages that exceed max_length
1173
+ time_range=None, # Tuple (start_dt, end_dt) to filter by datetime (None = all)
1174
+ event_limit=None, # Max number of events to include (None = all)
1175
+ # Conversation/LLM-optimized options:
1176
+ max_message_length=1000, # Max length per individual message (for 'conversation' format)
1177
+ max_total_length=8000, # Max total length of the entire conversation (for 'conversation' format)
1178
+ include_roles=('user', 'assistant'), # Which roles to include (for 'conversation' format)
1179
+ message_separator="\n\n", # Separator between messages (for 'conversation' format)
1180
+ role_prefix=True, # Whether to include role prefixes like "User:" and "Assistant:" (for 'conversation' format)
1181
+ truncate_indicator="...", # What to show when content is truncated (for 'conversation' format)
1182
+ ):
1183
+ """
1184
+ Render MEMORY contents with flexible filtering and formatting.
1185
+
1186
+ This method unifies all rendering and export logic, including:
1187
+ - General event/message rendering (plain, markdown, table, json)
1188
+ - Advanced filtering (by role, mode, channel, content, time, event type)
1189
+ - Metadata inclusion and pretty-printing
1190
+ - Output length limiting and message condensing/snipping
1191
+ - LLM-optimized conversation export (via output_format='conversation'),
1192
+ which produces a clean text blob of user/assistant messages with
1193
+ configurable length and formatting options.
1194
+
1195
+ Args:
1196
+ include: Which event types to include ('msgs', 'logs', 'refs', 'vars', 'events')
1197
+ output_format: 'plain', 'markdown', 'json', 'table', or 'conversation'
1198
+ role_filter: List of roles to include (None = all)
1199
+ mode_filter: List of modes to include (None = all)
1200
+ channel_filter: Channel to filter by (None = all)
1201
+ content_filter: String or list of keywords to filter content (None = all)
1202
+ include_metadata: Whether to include metadata (timestamps, roles, etc.)
1203
+ pretty: Pretty-print for human readability
1204
+ max_length: Max total length of output (for general formats)
1205
+ condense_msg: If True, snip/condense messages that exceed max_length
1206
+ time_range: Tuple (start_dt, end_dt) to filter by datetime (None = all)
1207
+ event_limit: Max number of events to include (None = all)
1208
+ max_message_length: Max length per message (for 'conversation' format)
1209
+ max_total_length: Max total length (for 'conversation' format)
1210
+ include_roles: Which roles to include (for 'conversation' format)
1211
+ message_separator: Separator between messages (for 'conversation' format)
1212
+ role_prefix: Whether to include role prefixes (for 'conversation' format)
1213
+ truncate_indicator: Indicator for truncated content (for 'conversation' format)
1214
+
1215
+ Returns:
1216
+ str or dict: Rendered output in the specified format.
1217
+
1218
+ Example usage:
1219
+ mem = MEMORY()
1220
+ mem.add_msg('user', 'Hello!')
1221
+ mem.add_msg('assistant', 'Hi there!')
1222
+ print(mem.render()) # Default: plain text, all messages
1223
+
1224
+ # Render only user messages in markdown
1225
+ print(mem.render(role_filter=['user'], output_format='markdown'))
1226
+
1227
+ # Render as a table, including logs and refs
1228
+ print(mem.render(include=('msgs', 'logs', 'refs'), output_format='table'))
1229
+
1230
+ # Render with a content keyword filter and max length
1231
+ print(mem.render(content_filter='hello', max_length=50))
1232
+
1233
+ # Export as LLM-optimized conversation
1234
+ print(mem.render(output_format='conversation', max_total_length=2000))
1235
+
1236
+ # Filter by channel
1237
+ print(mem.render(channel_filter='telegram'))
1238
+ """
1239
+ from datetime import datetime
1240
+
1241
+ # Helper: flatten include to set for fast lookup
1242
+ include_set = set(include)
1243
+
1244
+ # Helper: filter events by type using the new index-based retrieval
1245
+ def filter_events():
1246
+ events = []
1247
+ if 'events' in include_set:
1248
+ # Include all events from master index
1249
+ events = self._get_events_from_index(self.idx_all, -1)
1250
+ else:
1251
+ # Selectively include types
1252
+ if 'msgs' in include_set:
1253
+ events.extend(self._get_events_from_index(self.idx_msgs, -1))
1254
+ if 'logs' in include_set:
1255
+ events.extend(self._get_events_from_index(self.idx_logs, -1))
1256
+ if 'refs' in include_set:
1257
+ events.extend(self._get_events_from_index(self.idx_refs, -1))
1258
+ if 'vars' in include_set:
1259
+ events.extend(self._get_events_from_index(self.idx_vars, -1))
1260
+ return events
1261
+
1262
+ # Helper: filter by role, mode, channel, content, and time
1263
+ def advanced_filter(evlist):
1264
+ filtered = []
1265
+ for ev in evlist:
1266
+ # Role filter
1267
+ if role_filter:
1268
+ ev_role = ev.get('role') or ev.get('type')
1269
+ if ev_role not in role_filter:
1270
+ continue
1271
+ # Mode filter
1272
+ if mode_filter and ev.get('mode') not in mode_filter:
1273
+ continue
1274
+ # Channel filter
1275
+ if channel_filter and ev.get('channel') != channel_filter:
1276
+ continue
1277
+ # Content filter
1278
+ if content_filter:
1279
+ content = ev.get('content', '')
1280
+ if isinstance(content_filter, str):
1281
+ if content_filter.lower() not in content.lower():
1282
+ continue
1283
+ else: # list of keywords
1284
+ if not any(kw.lower() in content.lower() for kw in content_filter):
1285
+ continue
1286
+ # Time filter
1287
+ if time_range:
1288
+ # Try to get timestamp from event
1289
+ dt_str = ev.get('dt_utc') or ev.get('dt_bog')
1290
+ if dt_str:
1291
+ try:
1292
+ dt = datetime.fromisoformat(dt_str)
1293
+ start, end = time_range
1294
+ if (start and dt < start) or (end and dt > end):
1295
+ continue
1296
+ except Exception:
1297
+ pass # Ignore if can't parse
1298
+ filtered.append(ev)
1299
+ return filtered
1300
+
1301
+ # Helper: sort events by stamp (alphabetical = chronological)
1302
+ def sort_events(evlist):
1303
+ return sorted(evlist, key=lambda ev: ev.get('stamp', ''))
1304
+
1305
+ # Step 1: Gather and filter events
1306
+ events = filter_events()
1307
+ events = advanced_filter(events)
1308
+ events = sort_events(events)
1309
+ if event_limit:
1310
+ events = events[-event_limit:] # Most recent N
1311
+
1312
+ # --- Conversation/LLM-optimized format ---
1313
+ if output_format == 'conversation':
1314
+ # Only include messages and filter by include_roles
1315
+ conv_msgs = [ev for ev in events if ev.get('role') in include_roles]
1316
+ # Already sorted by stamp
1317
+
1318
+ conversation_parts = []
1319
+ current_length = 0
1320
+ for msg in conv_msgs:
1321
+ role = msg.get('role', 'unknown')
1322
+ content = msg.get('content', '')
1323
+
1324
+ # Truncate individual message if needed
1325
+ if len(content) > max_message_length:
1326
+ content = content[:max_message_length - len(truncate_indicator)] + truncate_indicator
1327
+
1328
+ # Format the message
1329
+ if role_prefix:
1330
+ if role == 'user':
1331
+ formatted_msg = "User: " + content
1332
+ elif role == 'assistant':
1333
+ formatted_msg = "Assistant: " + content
1334
+ else:
1335
+ formatted_msg = role.title() + ": " + content
1336
+ else:
1337
+ formatted_msg = content
1338
+
1339
+ # Check if adding this message would exceed total length
1340
+ message_length = len(formatted_msg) + len(message_separator)
1341
+ if current_length + message_length > max_total_length:
1342
+ # If we can't fit the full message, try to fit a truncated version
1343
+ remaining_space = max_total_length - current_length - len(truncate_indicator)
1344
+ if remaining_space > 50: # Only add if there's reasonable space
1345
+ if role_prefix:
1346
+ prefix_len = len(role.title() + ": ")
1347
+ truncated_content = content[:remaining_space - prefix_len] + truncate_indicator
1348
+ formatted_msg = role.title() + ": " + truncated_content
1349
+ else:
1350
+ formatted_msg = content[:remaining_space] + truncate_indicator
1351
+ conversation_parts.append(formatted_msg)
1352
+ break
1353
+
1354
+ conversation_parts.append(formatted_msg)
1355
+ current_length += message_length
1356
+
1357
+ return message_separator.join(conversation_parts)
1358
+
1359
+ # --- JSON format ---
1360
+ output = None
1361
+ total_length = 0
1362
+ snip_notice = " [snipped]" # For snipped messages
1363
+
1364
+ if output_format == 'json':
1365
+ # Output as JSON (list of dicts)
1366
+ if not include_metadata:
1367
+ # Remove metadata fields
1368
+ def strip_meta(ev):
1369
+ return {k: v for k, v in ev.items() if k in ('role', 'content', 'type', 'channel')}
1370
+ out_events = [strip_meta(ev) for ev in events]
1371
+ else:
1372
+ out_events = events
1373
+ output = json.dumps(out_events, indent=2 if pretty else None, default=str)
1374
+ if max_length and len(output) > max_length:
1375
+ output = output[:max_length] + snip_notice
1376
+
1377
+ elif output_format in ('plain', 'markdown', 'table'):
1378
+ # Build lines for each event
1379
+ lines = []
1380
+ for ev in events:
1381
+ # Compose line based on event type
1382
+ event_type = ev.get('type', 'msg')
1383
+ if event_type == 'log' or ev.get('role') == 'logger':
1384
+ prefix = "[LOG]"
1385
+ content = ev.get('content', '')
1386
+ elif event_type == 'ref':
1387
+ prefix = "[REF]"
1388
+ content = ev.get('content', '')
1389
+ elif event_type == 'var':
1390
+ prefix = "[VAR]"
1391
+ content = "{} = {}".format(ev.get('var_name', '?'), ev.get('var_value', '?'))
1392
+ else:
1393
+ prefix = "[{}]".format(ev.get('role', 'MSG').upper())
1394
+ content = ev.get('content', '')
1395
+
1396
+ # Optionally include metadata
1397
+ meta = ""
1398
+ if include_metadata:
1399
+ dt = ev.get('dt_utc') or ev.get('dt_bog')
1400
+ stamp = ev.get('stamp', '')
1401
+ channel = ev.get('channel', '')
1402
+ meta = " ({})".format(dt) if dt else ""
1403
+ if output_format == 'table':
1404
+ meta = "\t{}\t{}\t{}".format(dt or '', stamp or '', channel or '')
1405
+
1406
+ # Condense message if needed
1407
+ line = "{} {}{}".format(prefix, content, meta)
1408
+ if max_length and total_length + len(line) > max_length:
1409
+ if condense_msg:
1410
+ # Snip the content to fit
1411
+ allowed = max_length - total_length - len(snip_notice)
1412
+ if allowed > 0:
1413
+ line = line[:allowed] + snip_notice
1414
+ else:
1415
+ line = snip_notice
1416
+ lines.append(line)
1417
+ break
1418
+ else:
1419
+ break
1420
+ lines.append(line)
1421
+ total_length += len(line) + 1 # +1 for newline
1422
+
1423
+ # Format as table if requested
1424
+ if output_format == 'table':
1425
+ # Table header
1426
+ header = "Type\tContent\tDatetime\tStamp\tChannel"
1427
+ table_lines = [header]
1428
+ for ev in events:
1429
+ typ = ev.get('type', ev.get('role', ''))
1430
+ if typ == 'var':
1431
+ content = "{} = {}".format(ev.get('var_name', '?'), ev.get('var_value', '?'))
1432
+ else:
1433
+ content = ev.get('content', '')
1434
+ dt = ev.get('dt_utc') or ev.get('dt_bog') or ''
1435
+ stamp = ev.get('stamp', '')
1436
+ channel = ev.get('channel', '')
1437
+ row = "{}\t{}\t{}\t{}\t{}".format(typ, content, dt, stamp, channel)
1438
+ table_lines.append(row)
1439
+ output = "\n".join(table_lines)
1440
+ else:
1441
+ sep = "\n" if pretty else " "
1442
+ output = sep.join(lines)
1443
+
1444
+ else:
1445
+ raise ValueError("Unknown output_format: {}".format(output_format))
1446
+
1447
+ return output
1448
+
1449
+
1450
+ MemoryManipulationExamples = """
1451
+
1452
+ MEMORY Class Usage Tutorial
1453
+ ===========================
1454
+
1455
+ This tutorial demonstrates common workflows and transactions using the MEMORY class.
1456
+ The MEMORY class is an event-sourced state container for managing messages, logs,
1457
+ reflections, and variables in agentic or conversational systems.
1458
+
1459
+ Key Features:
1460
+ - Everything is an event with a sortable ID (alphabetical = chronological)
1461
+ - Events stored in a dictionary for O(1) lookup
1462
+ - Channel tracking for messages (omni-directional communication)
1463
+ - Full variable history with timestamps
1464
+ - Memory can be rehydrated from event list for cloud sync
1465
+
1466
+ ------------------------------------------------------------
1467
+ 1. Initialization
1468
+ ------------------------------------------------------------
1469
+
1470
+ >>> mem = MEMORY()
1471
+
1472
+ Creates a new MEMORY instance with empty event stores and indexes.
1473
+
1474
+ ------------------------------------------------------------
1475
+ 2. Adding and Retrieving Messages with Channel Support
1476
+ ------------------------------------------------------------
1477
+
1478
+ # Add user and assistant messages with channel tracking
1479
+ >>> mem.add_msg('user', 'Hello, assistant!', channel='webapp')
1480
+ >>> mem.add_msg('assistant', 'Hello, user! How can I help you?', channel='webapp')
1481
+
1482
+ # Messages from different channels
1483
+ >>> mem.add_msg('user', 'Quick question via phone', channel='ios')
1484
+ >>> mem.add_msg('user', 'Following up on Telegram', channel='telegram')
1485
+
1486
+ # Retrieve all messages as a list of dicts
1487
+ >>> mem.get_msgs()
1488
+ [{'role': 'user', 'content': 'Hello, assistant!', 'channel': 'webapp', ...}, ...]
1489
+
1490
+ # Filter messages by channel
1491
+ >>> mem.get_msgs(channel='telegram')
1492
+
1493
+ # Retrieve only user messages as a string
1494
+ >>> mem.get_msgs(include=['user'], repr='str')
1495
+ 'user: Hello, assistant!'
1496
+
1497
+ # Get the last assistant message
1498
+ >>> mem.last_asst_msg()
1499
+ 'Hello, user! How can I help you?'
1500
+
1501
+ ------------------------------------------------------------
1502
+ 3. Logging and Reflections
1503
+ ------------------------------------------------------------
1504
+
1505
+ # Add a log entry
1506
+ >>> mem.add_log('System initialized.')
1507
+
1508
+ # Add a reflection (agent's internal reasoning)
1509
+ >>> mem.add_ref('User seems to be asking about weather patterns.')
1510
+
1511
+ # Retrieve the last log message
1512
+ >>> mem.last_log_msg()
1513
+ 'System initialized.'
1514
+
1515
+ # Get all logs
1516
+ >>> mem.get_logs()
1517
+
1518
+ # Get all reflections
1519
+ >>> mem.get_refs()
1520
+
1521
+ ------------------------------------------------------------
1522
+ 4. Managing Variables (Full History Tracking)
1523
+ ------------------------------------------------------------
1524
+
1525
+ # Set a variable with a description (logged as an event!)
1526
+ >>> mem.set_var('session_id', 'abc123', desc='Current session identifier')
1527
+
1528
+ # Update the variable (appends to history, doesn't overwrite)
1529
+ >>> mem.set_var('session_id', 'xyz789')
1530
+
1531
+ # Retrieve the current value of a variable
1532
+ >>> mem.get_var('session_id')
1533
+ 'xyz789'
1534
+
1535
+ # Get all current non-deleted variables as a dict
1536
+ >>> mem.get_all_vars()
1537
+ {'session_id': 'xyz789'}
1538
+
1539
+ # Get full variable history as list of [stamp, value] pairs
1540
+ >>> mem.get_var_history('session_id')
1541
+ [['stamp1...', 'abc123'], ['stamp2...', 'xyz789']]
1542
+
1543
+ # Get variable description
1544
+ >>> mem.get_var_desc('session_id')
1545
+ 'Current session identifier'
1546
+
1547
+ # Delete a variable (marks as deleted but preserves history)
1548
+ >>> mem.del_var('session_id')
1549
+
1550
+ # After deletion, get_var returns None
1551
+ >>> mem.get_var('session_id')
1552
+ None
1553
+
1554
+ # Check if a variable is deleted
1555
+ >>> mem.is_var_deleted('session_id')
1556
+ True
1557
+
1558
+ # History still shows all changes including deletion
1559
+ >>> mem.get_var_history('session_id')
1560
+ [['stamp1...', 'abc123'], ['stamp2...', 'xyz789'], ['stamp3...', <DELETED>]]
1561
+
1562
+ # Variable can be re-set after deletion
1563
+ >>> mem.set_var('session_id', 'new_value')
1564
+ >>> mem.get_var('session_id')
1565
+ 'new_value'
1566
+
1567
+ ------------------------------------------------------------
1568
+ 5. Saving, Loading, and Copying State
1569
+ ------------------------------------------------------------
1570
+
1571
+ # Save MEMORY state to a file
1572
+ >>> mem.save('memory_state.pkl')
1573
+
1574
+ # Save with compression
1575
+ >>> mem.save('memory_state.pkl.gz', compressed=True)
1576
+
1577
+ # Load MEMORY state from a file (rehydrates from events)
1578
+ >>> mem2 = MEMORY()
1579
+ >>> mem2.load('memory_state.pkl')
1580
+
1581
+ # Deep copy the MEMORY object
1582
+ >>> mem3 = mem.copy()
1583
+
1584
+ ------------------------------------------------------------
1585
+ 6. Rehydrating from Events (Cloud Sync Ready)
1586
+ ------------------------------------------------------------
1587
+
1588
+ # Export all events
1589
+ >>> events = mem.get_events()
1590
+
1591
+ # Create a new memory from events (order doesn't matter, sorted by stamp)
1592
+ >>> mem_copy = MEMORY.from_events(events)
1593
+
1594
+ # Export snapshot for cloud storage
1595
+ >>> snapshot = mem.snapshot()
1596
+ # snapshot = {'id': '...', 'events': {...}}
1597
+
1598
+ ------------------------------------------------------------
1599
+ 7. Rendering and Exporting Memory Contents
1600
+ ------------------------------------------------------------
1601
+
1602
+ # Render all messages as plain text (default)
1603
+ >>> print(mem.render())
1604
+
1605
+ # Render only user messages in markdown format
1606
+ >>> print(mem.render(role_filter=['user'], output_format='markdown'))
1607
+
1608
+ # Render as a table, including logs and reflections
1609
+ >>> print(mem.render(include=('msgs', 'logs', 'refs'), output_format='table'))
1610
+
1611
+ # Filter by channel
1612
+ >>> print(mem.render(channel_filter='telegram'))
1613
+
1614
+ # Render with a content keyword filter and max length
1615
+ >>> print(mem.render(content_filter='hello', max_length=50))
1616
+
1617
+ # Export as LLM-optimized conversation (for prompt construction)
1618
+ >>> print(mem.render(output_format='conversation', max_total_length=2000))
1619
+
1620
+ ------------------------------------------------------------
1621
+ 8. Advanced Filtering and Formatting
1622
+ ------------------------------------------------------------
1623
+
1624
+ # Filter by role, mode, and channel
1625
+ >>> print(mem.render(role_filter=['assistant'], mode_filter=['text'], channel_filter='webapp'))
1626
+
1627
+ # Filter by time range (using datetime objects)
1628
+ >>> from datetime import datetime, timedelta
1629
+ >>> start = datetime.utcnow() - timedelta(hours=1)
1630
+ >>> end = datetime.utcnow()
1631
+ >>> print(mem.render(time_range=(start, end)))
1632
+
1633
+ # Limit number of events/messages
1634
+ >>> print(mem.render(event_limit=5))
1635
+
1636
+ # Get all events of specific types
1637
+ >>> mem.get_events(event_types=['msg', 'ref'])
1638
+
1639
+ ------------------------------------------------------------
1640
+ 9. Example: Full Workflow
1641
+ ------------------------------------------------------------
1642
+
1643
+ >>> mem = MEMORY()
1644
+ >>> mem.add_msg('user', 'What is the weather today?', channel='webapp')
1645
+ >>> mem.add_msg('assistant', 'The weather is sunny and warm.', channel='webapp')
1646
+ >>> mem.set_var('weather', 'sunny and warm', desc='Latest weather info')
1647
+ >>> mem.add_ref('User is interested in outdoor activities.')
1648
+ >>> mem.add_log('Weather query processed successfully.')
1649
+ >>> print(mem.render(output_format='conversation'))
1650
+
1651
+ # Export all events and rehydrate
1652
+ >>> all_events = mem.get_events()
1653
+ >>> mem_restored = MEMORY.from_events(all_events, mem.id)
1654
+
1655
+ ------------------------------------------------------------
1656
+ For more details, see the MEMORY class docstring and method documentation.
1657
+ ------------------------------------------------------------
1658
+ """