squidbot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- squidbot/__init__.py +5 -0
- squidbot/agent.py +263 -0
- squidbot/channels.py +271 -0
- squidbot/character.py +83 -0
- squidbot/client.py +318 -0
- squidbot/config.py +148 -0
- squidbot/daemon.py +310 -0
- squidbot/lanes.py +41 -0
- squidbot/main.py +157 -0
- squidbot/memory_db.py +706 -0
- squidbot/playwright_check.py +233 -0
- squidbot/plugins/__init__.py +47 -0
- squidbot/plugins/base.py +96 -0
- squidbot/plugins/hooks.py +416 -0
- squidbot/plugins/loader.py +248 -0
- squidbot/plugins/web3_plugin.py +407 -0
- squidbot/scheduler.py +214 -0
- squidbot/server.py +487 -0
- squidbot/session.py +609 -0
- squidbot/skills.py +141 -0
- squidbot/skills_template/reminder/SKILL.md +13 -0
- squidbot/skills_template/search/SKILL.md +11 -0
- squidbot/skills_template/summarize/SKILL.md +14 -0
- squidbot/tools/__init__.py +100 -0
- squidbot/tools/base.py +42 -0
- squidbot/tools/browser.py +311 -0
- squidbot/tools/coding.py +599 -0
- squidbot/tools/cron.py +218 -0
- squidbot/tools/memory_tool.py +152 -0
- squidbot/tools/web_search.py +50 -0
- squidbot-0.1.0.dist-info/METADATA +542 -0
- squidbot-0.1.0.dist-info/RECORD +34 -0
- squidbot-0.1.0.dist-info/WHEEL +4 -0
- squidbot-0.1.0.dist-info/entry_points.txt +4 -0
squidbot/memory_db.py
ADDED
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
"""Persistent memory using SQLite with native vector search (sqlite-vec).
|
|
2
|
+
|
|
3
|
+
Supports large documents with chunking and fast KNN search via vec0 virtual table.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import sqlite3
|
|
9
|
+
import struct
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import aiosqlite
|
|
15
|
+
from openai import AsyncOpenAI
|
|
16
|
+
|
|
17
|
+
from .config import DATA_DIR, OPENAI_API_KEY
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# Database path
|
|
22
|
+
DB_PATH = DATA_DIR / "memory.db"
|
|
23
|
+
|
|
24
|
+
# Embedding model
|
|
25
|
+
EMBEDDING_MODEL = "text-embedding-3-small"
|
|
26
|
+
EMBEDDING_DIM = 1536
|
|
27
|
+
|
|
28
|
+
# Chunking settings for large documents
|
|
29
|
+
CHUNK_SIZE = 500 # tokens (approx chars / 4)
|
|
30
|
+
CHUNK_OVERLAP = 100
|
|
31
|
+
|
|
32
|
+
# OpenAI client for embeddings
|
|
33
|
+
_client: Optional[AsyncOpenAI] = None
|
|
34
|
+
|
|
35
|
+
# sqlite-vec availability flag
|
|
36
|
+
_vec_available: Optional[bool] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_client() -> AsyncOpenAI:
|
|
40
|
+
global _client
|
|
41
|
+
if _client is None:
|
|
42
|
+
_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
|
|
43
|
+
return _client
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def serialize_f32(vector: list[float]) -> bytes:
|
|
47
|
+
"""Serialize vector to bytes for sqlite-vec (float32 format)."""
|
|
48
|
+
return struct.pack(f"{len(vector)}f", *vector)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def deserialize_f32(data: bytes) -> list[float]:
|
|
52
|
+
"""Deserialize bytes to vector."""
|
|
53
|
+
n = len(data) // 4
|
|
54
|
+
return list(struct.unpack(f"{n}f", data))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _check_vec_available(conn: sqlite3.Connection) -> bool:
|
|
58
|
+
"""Check if sqlite-vec extension is available."""
|
|
59
|
+
global _vec_available
|
|
60
|
+
if _vec_available is not None:
|
|
61
|
+
return _vec_available
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
import sqlite_vec
|
|
65
|
+
|
|
66
|
+
conn.enable_load_extension(True)
|
|
67
|
+
sqlite_vec.load(conn)
|
|
68
|
+
conn.enable_load_extension(False)
|
|
69
|
+
_vec_available = True
|
|
70
|
+
logger.info("sqlite-vec extension loaded successfully")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
_vec_available = False
|
|
73
|
+
logger.warning(f"sqlite-vec not available, using fallback: {e}")
|
|
74
|
+
|
|
75
|
+
return _vec_available
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _load_vec_extension(conn: sqlite3.Connection) -> bool:
|
|
79
|
+
"""Load sqlite-vec extension into connection."""
|
|
80
|
+
try:
|
|
81
|
+
import sqlite_vec
|
|
82
|
+
|
|
83
|
+
conn.enable_load_extension(True)
|
|
84
|
+
sqlite_vec.load(conn)
|
|
85
|
+
conn.enable_load_extension(False)
|
|
86
|
+
return True
|
|
87
|
+
except Exception:
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def get_embedding(text: str) -> list[float]:
|
|
92
|
+
"""Get embedding vector for text using OpenAI."""
|
|
93
|
+
client = get_client()
|
|
94
|
+
response = await client.embeddings.create(model=EMBEDDING_MODEL, input=text)
|
|
95
|
+
return response.data[0].embedding
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def chunk_text(
|
|
99
|
+
text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP
|
|
100
|
+
) -> list[dict]:
|
|
101
|
+
"""Split text into overlapping chunks for large documents.
|
|
102
|
+
|
|
103
|
+
Returns list of dicts with 'text', 'start', 'end' positions.
|
|
104
|
+
"""
|
|
105
|
+
# Approximate tokens as chars / 4
|
|
106
|
+
char_chunk_size = chunk_size * 4
|
|
107
|
+
char_overlap = overlap * 4
|
|
108
|
+
|
|
109
|
+
if len(text) <= char_chunk_size:
|
|
110
|
+
return [{"text": text, "start": 0, "end": len(text)}]
|
|
111
|
+
|
|
112
|
+
chunks = []
|
|
113
|
+
start = 0
|
|
114
|
+
|
|
115
|
+
while start < len(text):
|
|
116
|
+
end = min(start + char_chunk_size, len(text))
|
|
117
|
+
|
|
118
|
+
# Try to break at sentence/paragraph boundary
|
|
119
|
+
if end < len(text):
|
|
120
|
+
# Look for good break points
|
|
121
|
+
for sep in ["\n\n", "\n", ". ", "! ", "? ", ", "]:
|
|
122
|
+
break_pos = text.rfind(sep, start + char_chunk_size // 2, end)
|
|
123
|
+
if break_pos != -1:
|
|
124
|
+
end = break_pos + len(sep)
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
chunks.append(
|
|
128
|
+
{
|
|
129
|
+
"text": text[start:end].strip(),
|
|
130
|
+
"start": start,
|
|
131
|
+
"end": end,
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Move start with overlap
|
|
136
|
+
start = end - char_overlap if end < len(text) else end
|
|
137
|
+
|
|
138
|
+
return [c for c in chunks if c["text"]] # Filter empty chunks
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def init_db_sync(db_path: Path = DB_PATH) -> bool:
|
|
142
|
+
"""Initialize database synchronously. Returns True if vec0 is available."""
|
|
143
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
144
|
+
|
|
145
|
+
conn = sqlite3.connect(db_path)
|
|
146
|
+
|
|
147
|
+
# Enable WAL mode for better concurrency and performance
|
|
148
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
149
|
+
conn.execute("PRAGMA synchronous=NORMAL") # Good balance of safety and speed
|
|
150
|
+
conn.execute("PRAGMA cache_size=-64000") # 64MB cache
|
|
151
|
+
conn.execute("PRAGMA busy_timeout=5000") # 5 second timeout
|
|
152
|
+
|
|
153
|
+
vec_available = _load_vec_extension(conn)
|
|
154
|
+
|
|
155
|
+
# Create memories table
|
|
156
|
+
conn.execute(
|
|
157
|
+
"""
|
|
158
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
159
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
160
|
+
content TEXT NOT NULL,
|
|
161
|
+
category TEXT,
|
|
162
|
+
embedding BLOB,
|
|
163
|
+
created_at TEXT NOT NULL,
|
|
164
|
+
metadata TEXT
|
|
165
|
+
)
|
|
166
|
+
"""
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Create chunks table for large documents
|
|
170
|
+
conn.execute(
|
|
171
|
+
"""
|
|
172
|
+
CREATE TABLE IF NOT EXISTS memory_chunks (
|
|
173
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
174
|
+
memory_id INTEGER NOT NULL,
|
|
175
|
+
chunk_index INTEGER NOT NULL,
|
|
176
|
+
content TEXT NOT NULL,
|
|
177
|
+
start_pos INTEGER NOT NULL,
|
|
178
|
+
end_pos INTEGER NOT NULL,
|
|
179
|
+
embedding BLOB,
|
|
180
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
181
|
+
)
|
|
182
|
+
"""
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Create indices
|
|
186
|
+
conn.execute(
|
|
187
|
+
"CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category)"
|
|
188
|
+
)
|
|
189
|
+
conn.execute(
|
|
190
|
+
"CREATE INDEX IF NOT EXISTS idx_chunks_memory_id ON memory_chunks(memory_id)"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Create vec0 virtual table for fast KNN search if available
|
|
194
|
+
if vec_available:
|
|
195
|
+
try:
|
|
196
|
+
conn.execute(
|
|
197
|
+
f"""
|
|
198
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memory_vec USING vec0(
|
|
199
|
+
memory_id INTEGER PRIMARY KEY,
|
|
200
|
+
embedding float[{EMBEDDING_DIM}]
|
|
201
|
+
)
|
|
202
|
+
"""
|
|
203
|
+
)
|
|
204
|
+
conn.execute(
|
|
205
|
+
f"""
|
|
206
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunk_vec USING vec0(
|
|
207
|
+
chunk_id INTEGER PRIMARY KEY,
|
|
208
|
+
embedding float[{EMBEDDING_DIM}]
|
|
209
|
+
)
|
|
210
|
+
"""
|
|
211
|
+
)
|
|
212
|
+
logger.info("Created vec0 virtual tables for fast KNN search")
|
|
213
|
+
except Exception as e:
|
|
214
|
+
logger.warning(f"Failed to create vec0 tables: {e}")
|
|
215
|
+
vec_available = False
|
|
216
|
+
|
|
217
|
+
conn.commit()
|
|
218
|
+
conn.close()
|
|
219
|
+
|
|
220
|
+
return vec_available
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
async def init_db():
|
|
224
|
+
"""Initialize the database with tables."""
|
|
225
|
+
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
226
|
+
|
|
227
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
228
|
+
# Enable WAL mode for better concurrency and performance
|
|
229
|
+
await db.execute("PRAGMA journal_mode=WAL")
|
|
230
|
+
await db.execute("PRAGMA synchronous=NORMAL")
|
|
231
|
+
await db.execute("PRAGMA cache_size=-64000")
|
|
232
|
+
await db.execute("PRAGMA busy_timeout=5000")
|
|
233
|
+
# Create memories table
|
|
234
|
+
await db.execute(
|
|
235
|
+
"""
|
|
236
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
237
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
238
|
+
content TEXT NOT NULL,
|
|
239
|
+
category TEXT,
|
|
240
|
+
embedding BLOB,
|
|
241
|
+
created_at TEXT NOT NULL,
|
|
242
|
+
metadata TEXT
|
|
243
|
+
)
|
|
244
|
+
"""
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Create chunks table for large documents
|
|
248
|
+
await db.execute(
|
|
249
|
+
"""
|
|
250
|
+
CREATE TABLE IF NOT EXISTS memory_chunks (
|
|
251
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
252
|
+
memory_id INTEGER NOT NULL,
|
|
253
|
+
chunk_index INTEGER NOT NULL,
|
|
254
|
+
content TEXT NOT NULL,
|
|
255
|
+
start_pos INTEGER NOT NULL,
|
|
256
|
+
end_pos INTEGER NOT NULL,
|
|
257
|
+
embedding BLOB,
|
|
258
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE
|
|
259
|
+
)
|
|
260
|
+
"""
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Create indices
|
|
264
|
+
await db.execute(
|
|
265
|
+
"CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category)"
|
|
266
|
+
)
|
|
267
|
+
await db.execute(
|
|
268
|
+
"CREATE INDEX IF NOT EXISTS idx_chunks_memory_id ON memory_chunks(memory_id)"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
await db.commit()
|
|
272
|
+
|
|
273
|
+
# Initialize vec0 tables synchronously (sqlite-vec needs sync connection)
|
|
274
|
+
init_db_sync(DB_PATH)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
async def add_memory(
|
|
278
|
+
content: str,
|
|
279
|
+
category: Optional[str] = None,
|
|
280
|
+
metadata: Optional[dict] = None,
|
|
281
|
+
with_embedding: bool = True,
|
|
282
|
+
) -> dict:
|
|
283
|
+
"""Add a new memory entry with optional embedding."""
|
|
284
|
+
await init_db()
|
|
285
|
+
|
|
286
|
+
embedding_bytes = None
|
|
287
|
+
embedding = None
|
|
288
|
+
if with_embedding:
|
|
289
|
+
try:
|
|
290
|
+
embedding = await get_embedding(content)
|
|
291
|
+
embedding_bytes = serialize_f32(embedding)
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.warning(f"Failed to get embedding: {e}")
|
|
294
|
+
|
|
295
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
296
|
+
cursor = await db.execute(
|
|
297
|
+
"""
|
|
298
|
+
INSERT INTO memories (content, category, embedding, created_at, metadata)
|
|
299
|
+
VALUES (?, ?, ?, ?, ?)
|
|
300
|
+
""",
|
|
301
|
+
(
|
|
302
|
+
content,
|
|
303
|
+
category,
|
|
304
|
+
embedding_bytes,
|
|
305
|
+
datetime.now().isoformat(),
|
|
306
|
+
json.dumps(metadata) if metadata else None,
|
|
307
|
+
),
|
|
308
|
+
)
|
|
309
|
+
await db.commit()
|
|
310
|
+
entry_id = cursor.lastrowid
|
|
311
|
+
|
|
312
|
+
# Add to vec0 index if available
|
|
313
|
+
if embedding_bytes:
|
|
314
|
+
try:
|
|
315
|
+
conn = sqlite3.connect(DB_PATH)
|
|
316
|
+
if _load_vec_extension(conn):
|
|
317
|
+
conn.execute(
|
|
318
|
+
"INSERT OR REPLACE INTO memory_vec(memory_id, embedding) VALUES (?, ?)",
|
|
319
|
+
(entry_id, embedding_bytes),
|
|
320
|
+
)
|
|
321
|
+
conn.commit()
|
|
322
|
+
conn.close()
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.debug(f"Failed to add to vec0 index: {e}")
|
|
325
|
+
|
|
326
|
+
return {
|
|
327
|
+
"id": entry_id,
|
|
328
|
+
"content": content,
|
|
329
|
+
"category": category,
|
|
330
|
+
"created_at": datetime.now().isoformat(),
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
async def add_document(
|
|
335
|
+
content: str,
|
|
336
|
+
category: Optional[str] = None,
|
|
337
|
+
metadata: Optional[dict] = None,
|
|
338
|
+
) -> dict:
|
|
339
|
+
"""Add a large document with automatic chunking and embedding."""
|
|
340
|
+
await init_db()
|
|
341
|
+
|
|
342
|
+
# Store full document
|
|
343
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
344
|
+
cursor = await db.execute(
|
|
345
|
+
"""
|
|
346
|
+
INSERT INTO memories (content, category, embedding, created_at, metadata)
|
|
347
|
+
VALUES (?, ?, NULL, ?, ?)
|
|
348
|
+
""",
|
|
349
|
+
(
|
|
350
|
+
content,
|
|
351
|
+
category,
|
|
352
|
+
datetime.now().isoformat(),
|
|
353
|
+
json.dumps({**(metadata or {}), "chunked": True}),
|
|
354
|
+
),
|
|
355
|
+
)
|
|
356
|
+
await db.commit()
|
|
357
|
+
memory_id = cursor.lastrowid
|
|
358
|
+
|
|
359
|
+
# Chunk and embed
|
|
360
|
+
chunks = chunk_text(content)
|
|
361
|
+
chunk_ids = []
|
|
362
|
+
|
|
363
|
+
for idx, chunk in enumerate(chunks):
|
|
364
|
+
try:
|
|
365
|
+
embedding = await get_embedding(chunk["text"])
|
|
366
|
+
embedding_bytes = serialize_f32(embedding)
|
|
367
|
+
|
|
368
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
369
|
+
cursor = await db.execute(
|
|
370
|
+
"""
|
|
371
|
+
INSERT INTO memory_chunks
|
|
372
|
+
(memory_id, chunk_index, content, start_pos, end_pos, embedding)
|
|
373
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
374
|
+
""",
|
|
375
|
+
(
|
|
376
|
+
memory_id,
|
|
377
|
+
idx,
|
|
378
|
+
chunk["text"],
|
|
379
|
+
chunk["start"],
|
|
380
|
+
chunk["end"],
|
|
381
|
+
embedding_bytes,
|
|
382
|
+
),
|
|
383
|
+
)
|
|
384
|
+
await db.commit()
|
|
385
|
+
chunk_id = cursor.lastrowid
|
|
386
|
+
chunk_ids.append(chunk_id)
|
|
387
|
+
|
|
388
|
+
# Add to vec0 index
|
|
389
|
+
try:
|
|
390
|
+
conn = sqlite3.connect(DB_PATH)
|
|
391
|
+
if _load_vec_extension(conn):
|
|
392
|
+
conn.execute(
|
|
393
|
+
"INSERT OR REPLACE INTO chunk_vec(chunk_id, embedding) VALUES (?, ?)",
|
|
394
|
+
(chunk_id, embedding_bytes),
|
|
395
|
+
)
|
|
396
|
+
conn.commit()
|
|
397
|
+
conn.close()
|
|
398
|
+
except Exception:
|
|
399
|
+
pass
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.warning(f"Failed to embed chunk {idx}: {e}")
|
|
403
|
+
|
|
404
|
+
return {
|
|
405
|
+
"id": memory_id,
|
|
406
|
+
"content": content[:100] + "..." if len(content) > 100 else content,
|
|
407
|
+
"category": category,
|
|
408
|
+
"chunks": len(chunks),
|
|
409
|
+
"created_at": datetime.now().isoformat(),
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
async def search_memory(query: str, limit: int = 10) -> list[dict]:
|
|
414
|
+
"""Search memories by text content (LIKE query)."""
|
|
415
|
+
await init_db()
|
|
416
|
+
|
|
417
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
418
|
+
db.row_factory = aiosqlite.Row
|
|
419
|
+
cursor = await db.execute(
|
|
420
|
+
"""
|
|
421
|
+
SELECT id, content, category, created_at, metadata
|
|
422
|
+
FROM memories
|
|
423
|
+
WHERE content LIKE ?
|
|
424
|
+
ORDER BY created_at DESC
|
|
425
|
+
LIMIT ?
|
|
426
|
+
""",
|
|
427
|
+
(f"%{query}%", limit),
|
|
428
|
+
)
|
|
429
|
+
rows = await cursor.fetchall()
|
|
430
|
+
|
|
431
|
+
return [
|
|
432
|
+
{
|
|
433
|
+
"id": row["id"],
|
|
434
|
+
"content": row["content"],
|
|
435
|
+
"category": row["category"],
|
|
436
|
+
"created_at": row["created_at"],
|
|
437
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else None,
|
|
438
|
+
}
|
|
439
|
+
for row in rows
|
|
440
|
+
]
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
async def search_memory_semantic(query: str, limit: int = 5) -> list[dict]:
|
|
444
|
+
"""Search memories using semantic similarity via sqlite-vec KNN."""
|
|
445
|
+
await init_db()
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
query_embedding = await get_embedding(query)
|
|
449
|
+
query_bytes = serialize_f32(query_embedding)
|
|
450
|
+
except Exception:
|
|
451
|
+
return await search_memory(query, limit)
|
|
452
|
+
|
|
453
|
+
# Try native vec0 KNN search first
|
|
454
|
+
try:
|
|
455
|
+
conn = sqlite3.connect(DB_PATH)
|
|
456
|
+
if _load_vec_extension(conn):
|
|
457
|
+
# Search in memory_vec
|
|
458
|
+
cursor = conn.execute(
|
|
459
|
+
"""
|
|
460
|
+
SELECT m.id, m.content, m.category, m.created_at, m.metadata,
|
|
461
|
+
v.distance
|
|
462
|
+
FROM memory_vec v
|
|
463
|
+
JOIN memories m ON m.id = v.memory_id
|
|
464
|
+
WHERE v.embedding MATCH ?
|
|
465
|
+
AND k = ?
|
|
466
|
+
ORDER BY v.distance
|
|
467
|
+
""",
|
|
468
|
+
(query_bytes, limit * 2),
|
|
469
|
+
)
|
|
470
|
+
rows = cursor.fetchall()
|
|
471
|
+
|
|
472
|
+
# Also search chunks
|
|
473
|
+
cursor = conn.execute(
|
|
474
|
+
"""
|
|
475
|
+
SELECT m.id, mc.content, m.category, m.created_at, m.metadata,
|
|
476
|
+
cv.distance, mc.chunk_index
|
|
477
|
+
FROM chunk_vec cv
|
|
478
|
+
JOIN memory_chunks mc ON mc.id = cv.chunk_id
|
|
479
|
+
JOIN memories m ON m.id = mc.memory_id
|
|
480
|
+
WHERE cv.embedding MATCH ?
|
|
481
|
+
AND k = ?
|
|
482
|
+
ORDER BY cv.distance
|
|
483
|
+
""",
|
|
484
|
+
(query_bytes, limit * 2),
|
|
485
|
+
)
|
|
486
|
+
chunk_rows = cursor.fetchall()
|
|
487
|
+
conn.close()
|
|
488
|
+
|
|
489
|
+
# Combine results (convert distance to similarity)
|
|
490
|
+
results = []
|
|
491
|
+
seen_ids = set()
|
|
492
|
+
|
|
493
|
+
for row in rows:
|
|
494
|
+
if row[0] not in seen_ids:
|
|
495
|
+
seen_ids.add(row[0])
|
|
496
|
+
# L2 distance to cosine-like similarity
|
|
497
|
+
similarity = max(0, 1 - row[5] / 2)
|
|
498
|
+
results.append(
|
|
499
|
+
{
|
|
500
|
+
"id": row[0],
|
|
501
|
+
"content": row[1],
|
|
502
|
+
"category": row[2],
|
|
503
|
+
"created_at": row[3],
|
|
504
|
+
"metadata": json.loads(row[4]) if row[4] else None,
|
|
505
|
+
"similarity": similarity,
|
|
506
|
+
}
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
for row in chunk_rows:
|
|
510
|
+
if row[0] not in seen_ids:
|
|
511
|
+
seen_ids.add(row[0])
|
|
512
|
+
similarity = max(0, 1 - row[5] / 2)
|
|
513
|
+
results.append(
|
|
514
|
+
{
|
|
515
|
+
"id": row[0],
|
|
516
|
+
"content": row[1],
|
|
517
|
+
"category": row[2],
|
|
518
|
+
"created_at": row[3],
|
|
519
|
+
"metadata": json.loads(row[4]) if row[4] else None,
|
|
520
|
+
"similarity": similarity,
|
|
521
|
+
"chunk_index": row[6],
|
|
522
|
+
}
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
526
|
+
return results[:limit]
|
|
527
|
+
|
|
528
|
+
except Exception as e:
|
|
529
|
+
logger.debug(f"vec0 search failed, using fallback: {e}")
|
|
530
|
+
|
|
531
|
+
# Fallback: Python-based cosine similarity
|
|
532
|
+
return await _search_memory_fallback(query_embedding, limit)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
async def _search_memory_fallback(
|
|
536
|
+
query_embedding: list[float], limit: int
|
|
537
|
+
) -> list[dict]:
|
|
538
|
+
"""Fallback semantic search using Python cosine similarity."""
|
|
539
|
+
import numpy as np
|
|
540
|
+
|
|
541
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
542
|
+
db.row_factory = aiosqlite.Row
|
|
543
|
+
cursor = await db.execute(
|
|
544
|
+
"""
|
|
545
|
+
SELECT id, content, category, created_at, metadata, embedding
|
|
546
|
+
FROM memories
|
|
547
|
+
WHERE embedding IS NOT NULL
|
|
548
|
+
"""
|
|
549
|
+
)
|
|
550
|
+
rows = await cursor.fetchall()
|
|
551
|
+
|
|
552
|
+
if not rows:
|
|
553
|
+
return []
|
|
554
|
+
|
|
555
|
+
query_vec = np.array(query_embedding)
|
|
556
|
+
query_norm = np.linalg.norm(query_vec)
|
|
557
|
+
|
|
558
|
+
results = []
|
|
559
|
+
for row in rows:
|
|
560
|
+
if row["embedding"]:
|
|
561
|
+
mem_vec = np.array(deserialize_f32(row["embedding"]))
|
|
562
|
+
mem_norm = np.linalg.norm(mem_vec)
|
|
563
|
+
|
|
564
|
+
if query_norm > 0 and mem_norm > 0:
|
|
565
|
+
similarity = float(np.dot(query_vec, mem_vec) / (query_norm * mem_norm))
|
|
566
|
+
else:
|
|
567
|
+
similarity = 0
|
|
568
|
+
|
|
569
|
+
results.append(
|
|
570
|
+
{
|
|
571
|
+
"id": row["id"],
|
|
572
|
+
"content": row["content"],
|
|
573
|
+
"category": row["category"],
|
|
574
|
+
"created_at": row["created_at"],
|
|
575
|
+
"metadata": (
|
|
576
|
+
json.loads(row["metadata"]) if row["metadata"] else None
|
|
577
|
+
),
|
|
578
|
+
"similarity": similarity,
|
|
579
|
+
}
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
583
|
+
return results[:limit]
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
async def load_all_memories(limit: int = 100) -> list[dict]:
|
|
587
|
+
"""Load all memories."""
|
|
588
|
+
await init_db()
|
|
589
|
+
|
|
590
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
591
|
+
db.row_factory = aiosqlite.Row
|
|
592
|
+
cursor = await db.execute(
|
|
593
|
+
"""
|
|
594
|
+
SELECT id, content, category, created_at, metadata
|
|
595
|
+
FROM memories
|
|
596
|
+
ORDER BY created_at DESC
|
|
597
|
+
LIMIT ?
|
|
598
|
+
""",
|
|
599
|
+
(limit,),
|
|
600
|
+
)
|
|
601
|
+
rows = await cursor.fetchall()
|
|
602
|
+
|
|
603
|
+
return [
|
|
604
|
+
{
|
|
605
|
+
"id": row["id"],
|
|
606
|
+
"content": row["content"],
|
|
607
|
+
"category": row["category"],
|
|
608
|
+
"created_at": row["created_at"],
|
|
609
|
+
"metadata": json.loads(row["metadata"]) if row["metadata"] else None,
|
|
610
|
+
}
|
|
611
|
+
for row in rows
|
|
612
|
+
]
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
async def delete_memory(memory_id: int) -> bool:
|
|
616
|
+
"""Delete a memory and its chunks by ID."""
|
|
617
|
+
await init_db()
|
|
618
|
+
|
|
619
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
620
|
+
# Delete chunks first
|
|
621
|
+
await db.execute("DELETE FROM memory_chunks WHERE memory_id = ?", (memory_id,))
|
|
622
|
+
cursor = await db.execute("DELETE FROM memories WHERE id = ?", (memory_id,))
|
|
623
|
+
await db.commit()
|
|
624
|
+
deleted = cursor.rowcount > 0
|
|
625
|
+
|
|
626
|
+
# Remove from vec0 indices
|
|
627
|
+
if deleted:
|
|
628
|
+
try:
|
|
629
|
+
conn = sqlite3.connect(DB_PATH)
|
|
630
|
+
if _load_vec_extension(conn):
|
|
631
|
+
conn.execute("DELETE FROM memory_vec WHERE memory_id = ?", (memory_id,))
|
|
632
|
+
conn.execute(
|
|
633
|
+
"DELETE FROM chunk_vec WHERE chunk_id IN "
|
|
634
|
+
"(SELECT id FROM memory_chunks WHERE memory_id = ?)",
|
|
635
|
+
(memory_id,),
|
|
636
|
+
)
|
|
637
|
+
conn.commit()
|
|
638
|
+
conn.close()
|
|
639
|
+
except Exception:
|
|
640
|
+
pass
|
|
641
|
+
|
|
642
|
+
return deleted
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
async def get_memory_context(limit: int = 50) -> str:
|
|
646
|
+
"""Get memory as context string for system prompt."""
|
|
647
|
+
memories = await load_all_memories(limit)
|
|
648
|
+
|
|
649
|
+
if not memories:
|
|
650
|
+
return ""
|
|
651
|
+
|
|
652
|
+
lines = ["## Agent Memory"]
|
|
653
|
+
for entry in memories:
|
|
654
|
+
cat = f"[{entry['category']}] " if entry.get("category") else ""
|
|
655
|
+
content = entry["content"]
|
|
656
|
+
if len(content) > 200:
|
|
657
|
+
content = content[:200] + "..."
|
|
658
|
+
lines.append(f"- {cat}{content}")
|
|
659
|
+
|
|
660
|
+
return "\n".join(lines)
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
async def get_memory_stats() -> dict:
|
|
664
|
+
"""Get memory database statistics."""
|
|
665
|
+
await init_db()
|
|
666
|
+
|
|
667
|
+
async with aiosqlite.connect(DB_PATH) as db:
|
|
668
|
+
# Count memories
|
|
669
|
+
cursor = await db.execute("SELECT COUNT(*) FROM memories")
|
|
670
|
+
total_memories = (await cursor.fetchone())[0]
|
|
671
|
+
|
|
672
|
+
# Count chunks
|
|
673
|
+
cursor = await db.execute("SELECT COUNT(*) FROM memory_chunks")
|
|
674
|
+
total_chunks = (await cursor.fetchone())[0]
|
|
675
|
+
|
|
676
|
+
# Count with embeddings
|
|
677
|
+
cursor = await db.execute(
|
|
678
|
+
"SELECT COUNT(*) FROM memories WHERE embedding IS NOT NULL"
|
|
679
|
+
)
|
|
680
|
+
with_embeddings = (await cursor.fetchone())[0]
|
|
681
|
+
|
|
682
|
+
# Check vec0 status
|
|
683
|
+
vec_available = False
|
|
684
|
+
vec_memory_count = 0
|
|
685
|
+
vec_chunk_count = 0
|
|
686
|
+
|
|
687
|
+
try:
|
|
688
|
+
conn = sqlite3.connect(DB_PATH)
|
|
689
|
+
if _load_vec_extension(conn):
|
|
690
|
+
vec_available = True
|
|
691
|
+
cursor = conn.execute("SELECT COUNT(*) FROM memory_vec")
|
|
692
|
+
vec_memory_count = cursor.fetchone()[0]
|
|
693
|
+
cursor = conn.execute("SELECT COUNT(*) FROM chunk_vec")
|
|
694
|
+
vec_chunk_count = cursor.fetchone()[0]
|
|
695
|
+
conn.close()
|
|
696
|
+
except Exception:
|
|
697
|
+
pass
|
|
698
|
+
|
|
699
|
+
return {
|
|
700
|
+
"total_memories": total_memories,
|
|
701
|
+
"total_chunks": total_chunks,
|
|
702
|
+
"with_embeddings": with_embeddings,
|
|
703
|
+
"vec_available": vec_available,
|
|
704
|
+
"vec_memory_count": vec_memory_count,
|
|
705
|
+
"vec_chunk_count": vec_chunk_count,
|
|
706
|
+
}
|