universal-agent-context 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uacs/__init__.py +12 -0
- uacs/adapters/__init__.py +19 -0
- uacs/adapters/agent_skill_adapter.py +202 -0
- uacs/adapters/agents_md_adapter.py +330 -0
- uacs/adapters/base.py +261 -0
- uacs/adapters/clinerules_adapter.py +39 -0
- uacs/adapters/cursorrules_adapter.py +39 -0
- uacs/api.py +262 -0
- uacs/cli/__init__.py +6 -0
- uacs/cli/context.py +349 -0
- uacs/cli/main.py +195 -0
- uacs/cli/mcp.py +115 -0
- uacs/cli/memory.py +142 -0
- uacs/cli/packages.py +309 -0
- uacs/cli/skills.py +144 -0
- uacs/cli/utils.py +24 -0
- uacs/config/repositories.yaml +26 -0
- uacs/context/__init__.py +0 -0
- uacs/context/agent_context.py +406 -0
- uacs/context/shared_context.py +661 -0
- uacs/context/unified_context.py +332 -0
- uacs/mcp_server_entry.py +80 -0
- uacs/memory/__init__.py +5 -0
- uacs/memory/simple_memory.py +255 -0
- uacs/packages/__init__.py +26 -0
- uacs/packages/manager.py +413 -0
- uacs/packages/models.py +60 -0
- uacs/packages/sources.py +270 -0
- uacs/protocols/__init__.py +5 -0
- uacs/protocols/mcp/__init__.py +8 -0
- uacs/protocols/mcp/manager.py +77 -0
- uacs/protocols/mcp/skills_server.py +700 -0
- uacs/skills_validator.py +367 -0
- uacs/utils/__init__.py +5 -0
- uacs/utils/paths.py +24 -0
- uacs/visualization/README.md +132 -0
- uacs/visualization/__init__.py +36 -0
- uacs/visualization/models.py +195 -0
- uacs/visualization/static/index.html +857 -0
- uacs/visualization/storage.py +402 -0
- uacs/visualization/visualization.py +328 -0
- uacs/visualization/web_server.py +364 -0
- universal_agent_context-0.2.0.dist-info/METADATA +873 -0
- universal_agent_context-0.2.0.dist-info/RECORD +47 -0
- universal_agent_context-0.2.0.dist-info/WHEEL +4 -0
- universal_agent_context-0.2.0.dist-info/entry_points.txt +2 -0
- universal_agent_context-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,661 @@
|
|
|
1
|
+
"""Shared context management with compression for multi-agent communication.
|
|
2
|
+
|
|
3
|
+
This module provides token-efficient context sharing between agents using:
|
|
4
|
+
1. Semantic compression (summarization)
|
|
5
|
+
2. Deduplication
|
|
6
|
+
3. Reference-based storage
|
|
7
|
+
4. Progressive context building
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import uuid
|
|
14
|
+
import zlib
|
|
15
|
+
from dataclasses import asdict, dataclass
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import tiktoken
|
|
22
|
+
|
|
23
|
+
TIKTOKEN_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
TIKTOKEN_AVAILABLE = False
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ContextEntry:
|
|
32
|
+
"""A single context entry with metadata."""
|
|
33
|
+
|
|
34
|
+
id: str
|
|
35
|
+
content: str
|
|
36
|
+
compressed: bytes
|
|
37
|
+
agent: str
|
|
38
|
+
timestamp: str
|
|
39
|
+
references: list[str]
|
|
40
|
+
token_estimate: int
|
|
41
|
+
hash: str
|
|
42
|
+
quality: float = 1.0 # Quality score 0-1
|
|
43
|
+
metadata: dict[str, Any] = None # Additional metadata
|
|
44
|
+
topics: list[str] = None # Optional topic tags for focused retrieval
|
|
45
|
+
|
|
46
|
+
def __post_init__(self):
|
|
47
|
+
"""Initialize default metadata and topics."""
|
|
48
|
+
if self.metadata is None:
|
|
49
|
+
self.metadata = {}
|
|
50
|
+
if self.topics is None:
|
|
51
|
+
self.topics = []
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ContextSummary:
|
|
56
|
+
"""A compressed summary of multiple context entries."""
|
|
57
|
+
|
|
58
|
+
id: str
|
|
59
|
+
summary: str
|
|
60
|
+
entry_ids: list[str]
|
|
61
|
+
token_savings: int
|
|
62
|
+
created: str
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SharedContextManager:
|
|
66
|
+
"""Manages shared context between agents with compression."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, storage_path: Path | None = None):
|
|
69
|
+
"""Initialize context manager.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
storage_path: Path to store context data
|
|
73
|
+
"""
|
|
74
|
+
self.storage_path = storage_path or Path(".state/context")
|
|
75
|
+
self.storage_path.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
|
|
77
|
+
self.entries: dict[str, ContextEntry] = {}
|
|
78
|
+
self.summaries: dict[str, ContextSummary] = {}
|
|
79
|
+
self.dedup_index: dict[str, str] = {} # hash -> entry_id
|
|
80
|
+
|
|
81
|
+
# Initialize token encoder
|
|
82
|
+
if TIKTOKEN_AVAILABLE:
|
|
83
|
+
self.encoder = tiktoken.get_encoding("cl100k_base")
|
|
84
|
+
else:
|
|
85
|
+
self.encoder = None
|
|
86
|
+
|
|
87
|
+
self._load_context()
|
|
88
|
+
|
|
89
|
+
def add_context(
|
|
90
|
+
self, key: str, content: str, metadata: dict[str, Any] | None = None
|
|
91
|
+
) -> str:
|
|
92
|
+
"""Alias for add_entry with key parameter.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
key: Context key (used as agent name)
|
|
96
|
+
content: Content to store
|
|
97
|
+
metadata: Optional metadata
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Entry ID
|
|
101
|
+
"""
|
|
102
|
+
return self.add_entry(content, agent=key, metadata=metadata)
|
|
103
|
+
|
|
104
|
+
def add_entry(
|
|
105
|
+
self,
|
|
106
|
+
content: str,
|
|
107
|
+
agent: str,
|
|
108
|
+
references: list[str] | None = None,
|
|
109
|
+
metadata: dict[str, Any] | None = None,
|
|
110
|
+
topics: list[str] | None = None,
|
|
111
|
+
) -> str:
|
|
112
|
+
"""Add context entry with automatic compression.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
content: Context content
|
|
116
|
+
agent: Agent that created this context
|
|
117
|
+
references: IDs of referenced entries
|
|
118
|
+
metadata: Optional additional metadata
|
|
119
|
+
topics: Optional topic tags for focused retrieval
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Entry ID
|
|
123
|
+
"""
|
|
124
|
+
# Check for duplicates
|
|
125
|
+
content_hash = self._hash_content(content)
|
|
126
|
+
if content_hash in self.dedup_index:
|
|
127
|
+
return self.dedup_index[content_hash]
|
|
128
|
+
|
|
129
|
+
# Create entry
|
|
130
|
+
entry_id = self._generate_id()
|
|
131
|
+
compressed = zlib.compress(content.encode("utf-8"))
|
|
132
|
+
tokens = self.count_tokens(content)
|
|
133
|
+
quality = self._calculate_quality(content)
|
|
134
|
+
|
|
135
|
+
entry = ContextEntry(
|
|
136
|
+
id=entry_id,
|
|
137
|
+
content=content,
|
|
138
|
+
compressed=compressed,
|
|
139
|
+
agent=agent,
|
|
140
|
+
timestamp=datetime.now().isoformat(),
|
|
141
|
+
references=references or [],
|
|
142
|
+
token_estimate=tokens,
|
|
143
|
+
hash=content_hash,
|
|
144
|
+
quality=quality,
|
|
145
|
+
metadata=metadata or {},
|
|
146
|
+
topics=topics or [],
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
self.entries[entry_id] = entry
|
|
150
|
+
self.dedup_index[content_hash] = entry_id
|
|
151
|
+
|
|
152
|
+
# Auto-compress if context is getting large
|
|
153
|
+
if len(self.entries) > 10:
|
|
154
|
+
self._auto_compress()
|
|
155
|
+
|
|
156
|
+
self._save_entry(entry)
|
|
157
|
+
|
|
158
|
+
return entry_id
|
|
159
|
+
|
|
160
|
+
def get_entry(self, entry_id: str) -> str | None:
|
|
161
|
+
"""Get context entry by ID.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
entry_id: Entry ID
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Entry content or None
|
|
168
|
+
"""
|
|
169
|
+
entry = self.entries.get(entry_id)
|
|
170
|
+
return entry.content if entry else None
|
|
171
|
+
|
|
172
|
+
def get_compressed_context(
|
|
173
|
+
self,
|
|
174
|
+
agent: str | None = None,
|
|
175
|
+
max_tokens: int = 4000,
|
|
176
|
+
min_quality: float = 0.7,
|
|
177
|
+
) -> str:
|
|
178
|
+
"""Get compressed context suitable for agent prompts.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
agent: Filter by agent (None for all)
|
|
182
|
+
max_tokens: Maximum tokens to return
|
|
183
|
+
min_quality: Minimum quality score (0-1)
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Compressed context string
|
|
187
|
+
"""
|
|
188
|
+
# Collect relevant entries
|
|
189
|
+
entries = [
|
|
190
|
+
e
|
|
191
|
+
for e in self.entries.values()
|
|
192
|
+
if (agent is None or e.agent == agent) and e.quality >= min_quality
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
# Sort by weighted combination of quality (70%) and recency (30%)
|
|
196
|
+
entries.sort(
|
|
197
|
+
key=lambda e: (
|
|
198
|
+
e.quality * 0.7 + self._recency_score(e.timestamp) * 0.3,
|
|
199
|
+
e.timestamp,
|
|
200
|
+
),
|
|
201
|
+
reverse=True,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Build context within token budget
|
|
205
|
+
context_parts = []
|
|
206
|
+
token_count = 0
|
|
207
|
+
|
|
208
|
+
for entry in entries:
|
|
209
|
+
if token_count + entry.token_estimate > max_tokens:
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
context_parts.append(f"[{entry.agent}] {entry.content}")
|
|
213
|
+
token_count += entry.token_estimate
|
|
214
|
+
|
|
215
|
+
# Include summaries if available
|
|
216
|
+
for summary in self.summaries.values():
|
|
217
|
+
summary_tokens = self._estimate_tokens(summary.summary)
|
|
218
|
+
if token_count + summary_tokens <= max_tokens:
|
|
219
|
+
context_parts.append(f"[Summary] {summary.summary}")
|
|
220
|
+
token_count += summary_tokens
|
|
221
|
+
|
|
222
|
+
return "\n\n".join(context_parts)
|
|
223
|
+
|
|
224
|
+
def get_focused_context(
|
|
225
|
+
self,
|
|
226
|
+
topics: list[str] | None = None,
|
|
227
|
+
agent: str | None = None,
|
|
228
|
+
max_tokens: int = 4000,
|
|
229
|
+
min_quality: float = 0.7,
|
|
230
|
+
) -> str:
|
|
231
|
+
"""Get focused context filtered by topics with fallback.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
topics: List of topics to prioritize (None for all)
|
|
235
|
+
agent: Filter by agent (None for all)
|
|
236
|
+
max_tokens: Maximum tokens to return
|
|
237
|
+
min_quality: Minimum quality score (0-1)
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Focused context string with topic-matched entries prioritized
|
|
241
|
+
"""
|
|
242
|
+
# Collect relevant entries
|
|
243
|
+
all_entries = [
|
|
244
|
+
e
|
|
245
|
+
for e in self.entries.values()
|
|
246
|
+
if (agent is None or e.agent == agent) and e.quality >= min_quality
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
if not topics:
|
|
250
|
+
# No topics specified, use standard compressed context
|
|
251
|
+
return self.get_compressed_context(
|
|
252
|
+
agent=agent, max_tokens=max_tokens, min_quality=min_quality
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Separate entries by topic matching with boosted quality for multi-topic matches
|
|
256
|
+
topic_set = set(topics)
|
|
257
|
+
matching_entries = []
|
|
258
|
+
fallback_entries = []
|
|
259
|
+
|
|
260
|
+
for entry in all_entries:
|
|
261
|
+
entry_topics = set(entry.topics) if entry.topics else set()
|
|
262
|
+
matches = len(entry_topics & topic_set) # Count topic matches
|
|
263
|
+
if matches > 0:
|
|
264
|
+
# Boost quality based on number of matching topics (20% per match, capped at 1.0)
|
|
265
|
+
boosted_quality = min(entry.quality * (1 + 0.2 * matches), 1.0)
|
|
266
|
+
matching_entries.append((entry, boosted_quality))
|
|
267
|
+
else:
|
|
268
|
+
fallback_entries.append(entry)
|
|
269
|
+
|
|
270
|
+
# Sort matching entries by boosted quality (descending) then recency
|
|
271
|
+
matching_entries.sort(key=lambda x: (x[1], x[0].timestamp), reverse=True)
|
|
272
|
+
fallback_entries.sort(key=lambda e: (e.quality, e.timestamp), reverse=True)
|
|
273
|
+
|
|
274
|
+
# Build context within token budget, prioritizing matching entries
|
|
275
|
+
context_parts = []
|
|
276
|
+
token_count = 0
|
|
277
|
+
|
|
278
|
+
# Add matching entries first (now with boosted quality)
|
|
279
|
+
for entry, boosted_quality in matching_entries:
|
|
280
|
+
if token_count + entry.token_estimate > max_tokens:
|
|
281
|
+
break
|
|
282
|
+
|
|
283
|
+
topics_str = f" [topics: {', '.join(entry.topics)}]" if entry.topics else ""
|
|
284
|
+
context_parts.append(f"[{entry.agent}]{topics_str} {entry.content}")
|
|
285
|
+
token_count += entry.token_estimate
|
|
286
|
+
|
|
287
|
+
# Add fallback entries if token budget allows
|
|
288
|
+
for entry in fallback_entries:
|
|
289
|
+
if token_count + entry.token_estimate > max_tokens:
|
|
290
|
+
break
|
|
291
|
+
|
|
292
|
+
topics_str = f" [topics: {', '.join(entry.topics)}]" if entry.topics else ""
|
|
293
|
+
context_parts.append(f"[{entry.agent}]{topics_str} {entry.content}")
|
|
294
|
+
token_count += entry.token_estimate
|
|
295
|
+
|
|
296
|
+
# Include summaries if available and budget allows
|
|
297
|
+
for summary in self.summaries.values():
|
|
298
|
+
summary_tokens = self._estimate_tokens(summary.summary)
|
|
299
|
+
if token_count + summary_tokens <= max_tokens:
|
|
300
|
+
context_parts.append(f"[Summary] {summary.summary}")
|
|
301
|
+
token_count += summary_tokens
|
|
302
|
+
|
|
303
|
+
return "\n\n".join(context_parts)
|
|
304
|
+
|
|
305
|
+
def create_summary(self, entry_ids: list[str], summary_content: str) -> str:
|
|
306
|
+
"""Create a summary of multiple entries.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
entry_ids: IDs of entries to summarize
|
|
310
|
+
summary_content: Summary text
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Summary ID
|
|
314
|
+
"""
|
|
315
|
+
summary_id = self._generate_id()
|
|
316
|
+
|
|
317
|
+
# Calculate token savings
|
|
318
|
+
original_tokens = sum(
|
|
319
|
+
self.entries[eid].token_estimate for eid in entry_ids if eid in self.entries
|
|
320
|
+
)
|
|
321
|
+
summary_tokens = self._estimate_tokens(summary_content)
|
|
322
|
+
|
|
323
|
+
summary = ContextSummary(
|
|
324
|
+
id=summary_id,
|
|
325
|
+
summary=summary_content,
|
|
326
|
+
entry_ids=entry_ids,
|
|
327
|
+
token_savings=original_tokens - summary_tokens,
|
|
328
|
+
created=datetime.now().isoformat(),
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
self.summaries[summary_id] = summary
|
|
332
|
+
self._save_summary(summary)
|
|
333
|
+
|
|
334
|
+
# Remove original entries to save space
|
|
335
|
+
for eid in entry_ids:
|
|
336
|
+
if eid in self.entries:
|
|
337
|
+
del self.entries[eid]
|
|
338
|
+
|
|
339
|
+
return summary_id
|
|
340
|
+
|
|
341
|
+
def _auto_compress(self):
|
|
342
|
+
"""Automatically compress old context entries."""
|
|
343
|
+
# Group old entries by agent
|
|
344
|
+
old_entries = sorted(self.entries.values(), key=lambda e: e.timestamp)[
|
|
345
|
+
:5
|
|
346
|
+
] # Compress oldest 5
|
|
347
|
+
|
|
348
|
+
if len(old_entries) < 3:
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
# Create summary
|
|
352
|
+
entry_ids = [e.id for e in old_entries]
|
|
353
|
+
summary_content = self._create_auto_summary(old_entries)
|
|
354
|
+
|
|
355
|
+
self.create_summary(entry_ids, summary_content)
|
|
356
|
+
|
|
357
|
+
def _create_auto_summary(self, entries: list[ContextEntry]) -> str:
|
|
358
|
+
"""Create automatic summary of entries.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
entries: Entries to summarize
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Summary text
|
|
365
|
+
"""
|
|
366
|
+
# Simple extractive summary (in production, use LLM)
|
|
367
|
+
summaries = []
|
|
368
|
+
|
|
369
|
+
for entry in entries:
|
|
370
|
+
# Extract first sentence or first 100 chars
|
|
371
|
+
content = entry.content
|
|
372
|
+
first_sentence = content.split(".")[0][:100]
|
|
373
|
+
summaries.append(f"[{entry.agent}]: {first_sentence}...")
|
|
374
|
+
|
|
375
|
+
return " | ".join(summaries)
|
|
376
|
+
|
|
377
|
+
def get_context_graph(self) -> dict[str, Any]:
|
|
378
|
+
"""Get context relationships as graph structure.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Graph structure for visualization
|
|
382
|
+
"""
|
|
383
|
+
nodes = []
|
|
384
|
+
edges = []
|
|
385
|
+
|
|
386
|
+
# Add entry nodes
|
|
387
|
+
for entry in self.entries.values():
|
|
388
|
+
nodes.append(
|
|
389
|
+
{
|
|
390
|
+
"id": entry.id,
|
|
391
|
+
"type": "entry",
|
|
392
|
+
"agent": entry.agent,
|
|
393
|
+
"tokens": entry.token_estimate,
|
|
394
|
+
"timestamp": entry.timestamp,
|
|
395
|
+
}
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Add reference edges
|
|
399
|
+
for ref_id in entry.references:
|
|
400
|
+
edges.append(
|
|
401
|
+
{"source": entry.id, "target": ref_id, "type": "reference"}
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# Add summary nodes
|
|
405
|
+
for summary in self.summaries.values():
|
|
406
|
+
nodes.append(
|
|
407
|
+
{
|
|
408
|
+
"id": summary.id,
|
|
409
|
+
"type": "summary",
|
|
410
|
+
"tokens_saved": summary.token_savings,
|
|
411
|
+
"entry_count": len(summary.entry_ids),
|
|
412
|
+
}
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# Add summary edges
|
|
416
|
+
for entry_id in summary.entry_ids:
|
|
417
|
+
edges.append(
|
|
418
|
+
{"source": summary.id, "target": entry_id, "type": "summarizes"}
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
return {"nodes": nodes, "edges": edges, "stats": self.get_stats()}
|
|
422
|
+
|
|
423
|
+
def get_stats(self) -> dict[str, Any]:
|
|
424
|
+
"""Get context statistics.
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
Statistics dictionary
|
|
428
|
+
"""
|
|
429
|
+
total_tokens = sum(e.token_estimate for e in self.entries.values())
|
|
430
|
+
total_saved = sum(s.token_savings for s in self.summaries.values())
|
|
431
|
+
|
|
432
|
+
# Calculate quality statistics
|
|
433
|
+
qualities = [e.quality for e in self.entries.values()]
|
|
434
|
+
avg_quality = sum(qualities) / len(qualities) if qualities else 0
|
|
435
|
+
high_quality_count = len([q for q in qualities if q >= 0.7])
|
|
436
|
+
|
|
437
|
+
return {
|
|
438
|
+
"entry_count": len(self.entries),
|
|
439
|
+
"summary_count": len(self.summaries),
|
|
440
|
+
"total_tokens": total_tokens,
|
|
441
|
+
"tokens_saved": total_saved,
|
|
442
|
+
"compression_ratio": f"{(total_saved / (total_tokens + total_saved) * 100):.1f}%"
|
|
443
|
+
if total_tokens + total_saved > 0
|
|
444
|
+
else "0%",
|
|
445
|
+
"storage_size_mb": sum(len(e.compressed) for e in self.entries.values())
|
|
446
|
+
/ (1024 * 1024),
|
|
447
|
+
"avg_quality": f"{avg_quality:.2f}",
|
|
448
|
+
"high_quality_entries": high_quality_count,
|
|
449
|
+
"low_quality_entries": len(qualities) - high_quality_count,
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
def count_tokens(self, text: str) -> int:
|
|
453
|
+
"""Count tokens in text using tiktoken.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
text: Text to count
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
Token count
|
|
460
|
+
"""
|
|
461
|
+
if self.encoder:
|
|
462
|
+
return len(self.encoder.encode(text))
|
|
463
|
+
# Fallback: rough estimate
|
|
464
|
+
return len(text) // 4
|
|
465
|
+
|
|
466
|
+
def _recency_score(self, timestamp_str: str) -> float:
|
|
467
|
+
"""Calculate recency bonus based on entry age.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
timestamp_str: ISO format timestamp string
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Recency score (1.0 = now, 0.0 = 24h+ ago)
|
|
474
|
+
"""
|
|
475
|
+
from datetime import datetime
|
|
476
|
+
|
|
477
|
+
try:
|
|
478
|
+
from dateutil.parser import parse
|
|
479
|
+
|
|
480
|
+
timestamp = parse(timestamp_str)
|
|
481
|
+
except (ImportError, ValueError):
|
|
482
|
+
# Fallback: assume recent if parsing fails
|
|
483
|
+
return 0.5
|
|
484
|
+
|
|
485
|
+
age_hours = (datetime.now(timestamp.tzinfo) - timestamp).total_seconds() / 3600
|
|
486
|
+
# Linear decay: 1.0 at 0 hours, 0.0 at 24+ hours
|
|
487
|
+
return max(0.0, 1.0 - (age_hours / 24))
|
|
488
|
+
|
|
489
|
+
def _calculate_quality(self, content: str) -> float:
|
|
490
|
+
"""Calculate content quality score (0-1).
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
content: Content to score
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Quality score between 0 and 1
|
|
497
|
+
"""
|
|
498
|
+
score = 1.0
|
|
499
|
+
content_lower = content.lower()
|
|
500
|
+
|
|
501
|
+
# Penalize very short content
|
|
502
|
+
if len(content) < 50:
|
|
503
|
+
score *= 0.5
|
|
504
|
+
|
|
505
|
+
# Penalize error messages (but they can still be important)
|
|
506
|
+
if "error" in content_lower or "failed" in content_lower:
|
|
507
|
+
score *= 0.7
|
|
508
|
+
|
|
509
|
+
# Reward longer, detailed content
|
|
510
|
+
if len(content) > 200:
|
|
511
|
+
score *= 1.2
|
|
512
|
+
if len(content) > 500:
|
|
513
|
+
score *= 1.3
|
|
514
|
+
|
|
515
|
+
# Reward code blocks (indicates technical content)
|
|
516
|
+
if "```" in content:
|
|
517
|
+
score *= 1.3
|
|
518
|
+
|
|
519
|
+
# Reward content with questions (important for context)
|
|
520
|
+
if "?" in content and len(content) > 100:
|
|
521
|
+
score *= 1.2
|
|
522
|
+
|
|
523
|
+
# Penalize very generic responses
|
|
524
|
+
generic_phrases = ["you're welcome", "let me know", "happy to help"]
|
|
525
|
+
if any(phrase in content_lower for phrase in generic_phrases):
|
|
526
|
+
score *= 0.6
|
|
527
|
+
|
|
528
|
+
# Reward technical content (has specific terms)
|
|
529
|
+
technical_indicators = ["function", "class", "method", "error", "bug", "fix", "implement"]
|
|
530
|
+
if any(term in content_lower for term in technical_indicators):
|
|
531
|
+
score *= 1.2
|
|
532
|
+
|
|
533
|
+
# Reward substantial token count
|
|
534
|
+
token_count = self.count_tokens(content)
|
|
535
|
+
if token_count > 100:
|
|
536
|
+
score *= 1.1
|
|
537
|
+
|
|
538
|
+
return min(score, 1.0)
|
|
539
|
+
|
|
540
|
+
def _estimate_tokens(self, text: str) -> int:
|
|
541
|
+
"""Estimate token count for text (for backward compatibility).
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
text: Text to estimate
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Estimated token count
|
|
548
|
+
"""
|
|
549
|
+
return self.count_tokens(text)
|
|
550
|
+
|
|
551
|
+
def _hash_content(self, content: str) -> str:
|
|
552
|
+
"""Generate hash for content deduplication.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
content: Content to hash
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
Content hash
|
|
559
|
+
"""
|
|
560
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
561
|
+
|
|
562
|
+
def _generate_id(self) -> str:
|
|
563
|
+
"""Generate unique ID.
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
Unique ID string
|
|
567
|
+
"""
|
|
568
|
+
return str(uuid.uuid4())[:8]
|
|
569
|
+
|
|
570
|
+
def _save_entry(self, entry: ContextEntry):
|
|
571
|
+
"""Save entry to disk.
|
|
572
|
+
|
|
573
|
+
Args:
|
|
574
|
+
entry: Entry to save
|
|
575
|
+
"""
|
|
576
|
+
entry_file = self.storage_path / f"{entry.id}.json"
|
|
577
|
+
|
|
578
|
+
# Don't serialize compressed content in JSON
|
|
579
|
+
entry_dict = asdict(entry)
|
|
580
|
+
entry_dict["compressed"] = None
|
|
581
|
+
|
|
582
|
+
entry_file.write_text(json.dumps(entry_dict, indent=2))
|
|
583
|
+
|
|
584
|
+
# Save compressed separately
|
|
585
|
+
compressed_file = self.storage_path / f"{entry.id}.zlib"
|
|
586
|
+
compressed_file.write_bytes(entry.compressed)
|
|
587
|
+
|
|
588
|
+
def _save_summary(self, summary: ContextSummary):
|
|
589
|
+
"""Save summary to disk.
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
summary: Summary to save
|
|
593
|
+
"""
|
|
594
|
+
summary_file = self.storage_path / f"summary_{summary.id}.json"
|
|
595
|
+
summary_file.write_text(json.dumps(asdict(summary), indent=2))
|
|
596
|
+
|
|
597
|
+
def _load_context(self):
|
|
598
|
+
"""Load context from disk."""
|
|
599
|
+
if not self.storage_path.exists():
|
|
600
|
+
return
|
|
601
|
+
|
|
602
|
+
# Load entries
|
|
603
|
+
for entry_file in self.storage_path.glob("*.json"):
|
|
604
|
+
if entry_file.name.startswith("summary_"):
|
|
605
|
+
continue
|
|
606
|
+
|
|
607
|
+
try:
|
|
608
|
+
entry_dict = json.loads(entry_file.read_text())
|
|
609
|
+
|
|
610
|
+
# Load compressed data
|
|
611
|
+
compressed_file = self.storage_path / f"{entry_dict['id']}.zlib"
|
|
612
|
+
if compressed_file.exists():
|
|
613
|
+
entry_dict["compressed"] = compressed_file.read_bytes()
|
|
614
|
+
else:
|
|
615
|
+
entry_dict["compressed"] = b""
|
|
616
|
+
|
|
617
|
+
entry = ContextEntry(**entry_dict)
|
|
618
|
+
self.entries[entry.id] = entry
|
|
619
|
+
self.dedup_index[entry.hash] = entry.id
|
|
620
|
+
except Exception as e:
|
|
621
|
+
logger.warning("Error loading entry %s: %s", entry_file, e)
|
|
622
|
+
|
|
623
|
+
def _save_context(self):
|
|
624
|
+
"""Save all context entries to disk."""
|
|
625
|
+
if not self.storage_path:
|
|
626
|
+
return
|
|
627
|
+
|
|
628
|
+
self.storage_path.mkdir(parents=True, exist_ok=True)
|
|
629
|
+
|
|
630
|
+
for entry_id, entry in self.entries.items():
|
|
631
|
+
entry_file = self.storage_path / f"{entry_id}.json"
|
|
632
|
+
entry_dict = {
|
|
633
|
+
"id": entry.id,
|
|
634
|
+
"content": entry.content,
|
|
635
|
+
"agent": entry.agent,
|
|
636
|
+
"timestamp": entry.timestamp,
|
|
637
|
+
"references": entry.references,
|
|
638
|
+
"token_estimate": entry.token_estimate,
|
|
639
|
+
"hash": entry.hash,
|
|
640
|
+
"quality": entry.quality,
|
|
641
|
+
"metadata": entry.metadata or {},
|
|
642
|
+
}
|
|
643
|
+
entry_file.write_text(json.dumps(entry_dict, indent=2))
|
|
644
|
+
|
|
645
|
+
# Save compressed data separately
|
|
646
|
+
if entry.compressed:
|
|
647
|
+
compressed_file = self.storage_path / f"{entry_id}.zlib"
|
|
648
|
+
compressed_file.write_bytes(entry.compressed)
|
|
649
|
+
else:
|
|
650
|
+
# Write empty bytes for None compressed
|
|
651
|
+
compressed_file = self.storage_path / f"{entry_id}.zlib"
|
|
652
|
+
compressed_file.write_bytes(b"")
|
|
653
|
+
|
|
654
|
+
# Load summaries
|
|
655
|
+
for summary_file in self.storage_path.glob("summary_*.json"):
|
|
656
|
+
try:
|
|
657
|
+
summary_dict = json.loads(summary_file.read_text())
|
|
658
|
+
summary = ContextSummary(**summary_dict)
|
|
659
|
+
self.summaries[summary.id] = summary
|
|
660
|
+
except Exception as e:
|
|
661
|
+
logger.warning("Error loading summary %s: %s", summary_file, e)
|