vector-memory-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/memory_store.py ADDED
@@ -0,0 +1,512 @@
1
+ """
2
+ Memory Store Module
3
+ ===================
4
+
5
+ Provides SQLite-vec based vector storage and retrieval operations.
6
+ Handles database initialization, memory storage, search, and management.
7
+ """
8
+
9
+ import sqlite3
10
+ import sqlite_vec
11
+ import json
12
+ import os
13
+ from datetime import datetime, timedelta, timezone
14
+ from pathlib import Path
15
+ from typing import List, Optional, Dict, Any, Tuple
16
+
17
+ from .models import MemoryEntry, MemoryCategory, SearchResult, MemoryStats, Config
18
+ from .security import (
19
+ SecurityError, sanitize_input, validate_tags, validate_category,
20
+ validate_search_params, validate_cleanup_params, generate_content_hash,
21
+ check_resource_limits, validate_file_path
22
+ )
23
+ from .embeddings import get_embedding_model
24
+
25
+
26
+ class VectorMemoryStore:
27
+ """
28
+ Thread-safe vector memory storage using sqlite-vec.
29
+ """
30
+
31
+ def __init__(self, db_path: Path, embedding_model_name: str = None):
32
+ """
33
+ Initialize vector memory store.
34
+
35
+ Args:
36
+ db_path: Path to SQLite database file
37
+ embedding_model_name: Name of embedding model to use
38
+ """
39
+ self.db_path = Path(db_path)
40
+ self.embedding_model_name = embedding_model_name or Config.EMBEDDING_MODEL
41
+
42
+ # Validate database path
43
+ validate_file_path(self.db_path)
44
+
45
+ # Initialize database and embedding model
46
+ self._init_database()
47
+ self.embedding_model = get_embedding_model(self.embedding_model_name)
48
+
49
+ def _init_database(self) -> None:
50
+ """Initialize sqlite-vec database with required tables."""
51
+ try:
52
+ conn = self._get_connection()
53
+ except Exception as e:
54
+ raise RuntimeError(f"Failed to store memory: {e}")
55
+
56
+ try:
57
+ # Create metadata table
58
+ conn.execute("""
59
+ CREATE TABLE IF NOT EXISTS memory_metadata (
60
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
61
+ content_hash TEXT UNIQUE NOT NULL,
62
+ content TEXT NOT NULL,
63
+ category TEXT NOT NULL,
64
+ tags TEXT NOT NULL, -- JSON array
65
+ created_at TEXT NOT NULL,
66
+ updated_at TEXT NOT NULL,
67
+ access_count INTEGER DEFAULT 0
68
+ )
69
+ """)
70
+
71
+ # Create vector table using vec0
72
+ conn.execute(f"""
73
+ CREATE VIRTUAL TABLE IF NOT EXISTS memory_vectors USING vec0(
74
+ embedding float[{Config.EMBEDDING_DIM}]
75
+ );
76
+ """)
77
+
78
+ # Create indexes for performance
79
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_category ON memory_metadata(category)")
80
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_created_at ON memory_metadata(created_at)")
81
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_hash ON memory_metadata(content_hash)")
82
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_access_count ON memory_metadata(access_count)")
83
+
84
+ conn.commit()
85
+
86
+ except Exception as e:
87
+ conn.rollback()
88
+ raise RuntimeError(f"Failed to initialize database: {e}")
89
+ finally:
90
+ conn.close()
91
+
92
+ def _get_connection(self) -> sqlite3.Connection:
93
+ """Get SQLite connection with sqlite-vec loaded."""
94
+ conn = sqlite3.connect(str(self.db_path))
95
+ conn.enable_load_extension(True)
96
+ sqlite_vec.load(conn)
97
+ # Enable WAL mode for safe concurrent access
98
+ conn.execute("PRAGMA journal_mode=WAL")
99
+ conn.enable_load_extension(False)
100
+ return conn
101
+
102
+ def store_memory(self, content: str, category: str, tags: List[str]) -> Dict[str, Any]:
103
+ """
104
+ Store a new memory with vector embedding.
105
+
106
+ Args:
107
+ content: Memory content
108
+ category: Memory category
109
+ tags: List of tags
110
+
111
+ Returns:
112
+ Dict with operation result and metadata
113
+ """
114
+ # Input validation
115
+ content = sanitize_input(content)
116
+ category = validate_category(category)
117
+ tags = validate_tags(tags)
118
+
119
+ # Check for duplicates
120
+ content_hash = generate_content_hash(content)
121
+
122
+ try:
123
+ conn = self._get_connection()
124
+ except Exception as e:
125
+ raise RuntimeError(f"Failed to store memory: {e}")
126
+
127
+ try:
128
+ # Check if memory already exists
129
+ existing = conn.execute(
130
+ "SELECT id FROM memory_metadata WHERE content_hash = ?",
131
+ (content_hash,)
132
+ ).fetchone()
133
+
134
+ if existing:
135
+ return {
136
+ "success": False,
137
+ "message": "Memory already exists",
138
+ "memory_id": existing[0]
139
+ }
140
+
141
+ # Check memory limit
142
+ count = conn.execute("SELECT COUNT(*) FROM memory_metadata").fetchone()[0]
143
+ if count >= Config.MAX_TOTAL_MEMORIES:
144
+ return {
145
+ "success": False,
146
+ "message": f"Memory limit reached ({count}). Use clear_old_memories to free space.",
147
+ "memory_id": None
148
+ }
149
+
150
+ # Generate embedding
151
+ embedding = self.embedding_model.encode_single(content)
152
+
153
+ # Store metadata
154
+ now = datetime.now(timezone.utc).isoformat()
155
+ cursor = conn.execute("""
156
+ INSERT INTO memory_metadata (content_hash, content, category, tags, created_at, updated_at)
157
+ VALUES (?, ?, ?, ?, ?, ?)
158
+ """, (content_hash, content, category, json.dumps(tags), now, now))
159
+
160
+ memory_id = cursor.lastrowid
161
+
162
+ # Store vector using sqlite-vec serialization
163
+ embedding_blob = sqlite_vec.serialize_float32(embedding)
164
+ conn.execute(
165
+ "INSERT INTO memory_vectors (rowid, embedding) VALUES (?, ?)",
166
+ (memory_id, embedding_blob)
167
+ )
168
+
169
+ conn.commit()
170
+
171
+ return {
172
+ "success": True,
173
+ "memory_id": memory_id,
174
+ "content_preview": content[:100] + "..." if len(content) > 100 else content,
175
+ "category": category,
176
+ "tags": tags,
177
+ "created_at": now
178
+ }
179
+
180
+ except SecurityError as e:
181
+ conn.rollback()
182
+ raise e
183
+ except Exception as e:
184
+ conn.rollback()
185
+ raise RuntimeError(f"Failed to store memory: {e}")
186
+ finally:
187
+ conn.close()
188
+
189
+ def search_memories(self, query: str, limit: int = 10, category: Optional[str] = None) -> List[SearchResult]:
190
+ """
191
+ Search memories using vector similarity.
192
+
193
+ Args:
194
+ query: Search query
195
+ limit: Maximum number of results
196
+ category: Optional category filter
197
+
198
+ Returns:
199
+ List of SearchResult objects
200
+ """
201
+ query, limit, category = validate_search_params(query, limit, category)
202
+
203
+ try:
204
+ conn = self._get_connection()
205
+ except Exception as e:
206
+ raise RuntimeError(f"Failed to store memory: {e}")
207
+
208
+ try:
209
+ # Generate query embedding
210
+ query_embedding = self.embedding_model.encode_single(query)
211
+ query_blob = sqlite_vec.serialize_float32(query_embedding)
212
+
213
+ # Build search query
214
+ base_query = """
215
+ SELECT
216
+ m.id, m.content, m.category, m.tags, m.created_at, m.updated_at, m.access_count, m.content_hash,
217
+ vec_distance_cosine(v.embedding, ?) as distance
218
+ FROM memory_metadata m
219
+ JOIN memory_vectors v ON m.id = v.rowid
220
+ """
221
+
222
+ params = [query_blob]
223
+
224
+ if category:
225
+ base_query += " WHERE m.category = ?"
226
+ params.append(category)
227
+
228
+ base_query += " ORDER BY distance LIMIT ?"
229
+ params.append(limit)
230
+
231
+ results = conn.execute(base_query, params).fetchall()
232
+
233
+ # Update access counts for returned memories
234
+ if results:
235
+ memory_ids = [str(r[0]) for r in results]
236
+ placeholders = ",".join(["?"] * len(memory_ids))
237
+ conn.execute(f"""
238
+ UPDATE memory_metadata
239
+ SET access_count = access_count + 1,
240
+ updated_at = ?
241
+ WHERE id IN ({placeholders})
242
+ """, [datetime.now(timezone.utc).isoformat()] + memory_ids)
243
+ conn.commit()
244
+
245
+ # Format results
246
+ search_results = []
247
+ for row in results:
248
+ memory = MemoryEntry.from_db_row(row[:-1]) # Exclude distance
249
+ memory.access_count += 1 # Include current access
250
+
251
+ distance = row[-1]
252
+ similarity = 1 - distance # Convert distance to similarity
253
+
254
+ search_results.append(SearchResult(
255
+ memory=memory,
256
+ similarity=similarity,
257
+ distance=distance
258
+ ))
259
+
260
+ return search_results
261
+
262
+ except SecurityError as e:
263
+ raise e
264
+ except Exception as e:
265
+ raise RuntimeError(f"Search failed: {e}")
266
+ finally:
267
+ conn.close()
268
+
269
+ def get_recent_memories(self, limit: int = 10) -> List[MemoryEntry]:
270
+ """
271
+ Get recently stored memories.
272
+
273
+ Args:
274
+ limit: Maximum number of memories to return
275
+
276
+ Returns:
277
+ List of MemoryEntry objects
278
+ """
279
+ limit = min(max(1, limit), Config.MAX_MEMORIES_PER_SEARCH)
280
+
281
+ try:
282
+ conn = self._get_connection()
283
+ except Exception as e:
284
+ raise RuntimeError(f"Failed to store memory: {e}")
285
+
286
+ try:
287
+ results = conn.execute("""
288
+ SELECT id, content, category, tags, created_at, updated_at, access_count, content_hash
289
+ FROM memory_metadata
290
+ ORDER BY created_at DESC
291
+ LIMIT ?
292
+ """, (limit,)).fetchall()
293
+
294
+ memories = [MemoryEntry.from_db_row(row) for row in results]
295
+ return memories
296
+
297
+ except Exception as e:
298
+ raise RuntimeError(f"Failed to get recent memories: {e}")
299
+ finally:
300
+ conn.close()
301
+
302
+ def get_stats(self) -> MemoryStats:
303
+ """
304
+ Get database statistics.
305
+
306
+ Returns:
307
+ MemoryStats object with comprehensive statistics
308
+ """
309
+ try:
310
+ conn = self._get_connection()
311
+ except Exception as e:
312
+ raise RuntimeError(f"Failed to store memory: {e}")
313
+
314
+ try:
315
+ # Basic counts
316
+ total_memories = conn.execute("SELECT COUNT(*) FROM memory_metadata").fetchone()[0]
317
+
318
+ # Category breakdown
319
+ categories = dict(conn.execute("""
320
+ SELECT category, COUNT(*)
321
+ FROM memory_metadata
322
+ GROUP BY category
323
+ ORDER BY COUNT(*) DESC
324
+ """).fetchall())
325
+
326
+ # Recent activity
327
+ week_ago = (datetime.now(timezone.utc) - timedelta(days=7)).isoformat()
328
+ recent_count = conn.execute(
329
+ "SELECT COUNT(*) FROM memory_metadata WHERE created_at > ?",
330
+ (week_ago,)
331
+ ).fetchone()[0]
332
+
333
+ # Database size
334
+ db_size = os.path.getsize(self.db_path) if self.db_path.exists() else 0
335
+
336
+ # Most accessed memories
337
+ top_memories = conn.execute("""
338
+ SELECT content, access_count
339
+ FROM memory_metadata
340
+ ORDER BY access_count DESC
341
+ LIMIT 5
342
+ """).fetchall()
343
+
344
+ # Health status
345
+ usage_pct = (total_memories / Config.MAX_TOTAL_MEMORIES) * 100
346
+ if usage_pct < 70:
347
+ health_status = "Healthy"
348
+ elif usage_pct < 90:
349
+ health_status = "Monitor - Consider cleanup"
350
+ else:
351
+ health_status = "Warning - Near limit"
352
+
353
+ stats = MemoryStats(
354
+ total_memories=total_memories,
355
+ memory_limit=Config.MAX_TOTAL_MEMORIES,
356
+ categories=categories,
357
+ recent_week_count=recent_count,
358
+ database_size_mb=round(db_size / 1024 / 1024, 2),
359
+ embedding_model=self.embedding_model_name,
360
+ embedding_dimensions=Config.EMBEDDING_DIM,
361
+ top_accessed=[
362
+ {
363
+ "content_preview": content[:100] + "..." if len(content) > 100 else content,
364
+ "access_count": count
365
+ }
366
+ for content, count in top_memories
367
+ ],
368
+ health_status=health_status
369
+ )
370
+
371
+ return stats
372
+
373
+ except Exception as e:
374
+ raise RuntimeError(f"Failed to get statistics: {e}")
375
+ finally:
376
+ conn.close()
377
+
378
+ def clear_old_memories(self, days_old: int = 30, max_to_keep: int = 1000) -> Dict[str, Any]:
379
+ """
380
+ Clear old, less accessed memories.
381
+
382
+ Args:
383
+ days_old: Minimum age for cleanup candidates
384
+ max_to_keep: Maximum total memories to keep
385
+
386
+ Returns:
387
+ Dict with cleanup results
388
+ """
389
+ days_old, max_to_keep = validate_cleanup_params(days_old, max_to_keep)
390
+
391
+ cutoff_date = (datetime.now(timezone.utc) - timedelta(days=days_old)).isoformat()
392
+
393
+ try:
394
+ conn = self._get_connection()
395
+ except Exception as e:
396
+ raise RuntimeError(f"Failed to store memory: {e}")
397
+
398
+ try:
399
+ # Find candidates for deletion (old + low access)
400
+ candidates = conn.execute("""
401
+ SELECT id
402
+ FROM memory_metadata
403
+ WHERE created_at < ?
404
+ ORDER BY access_count ASC, created_at ASC
405
+ """, (cutoff_date,)).fetchall()
406
+
407
+ total_count = conn.execute("SELECT COUNT(*) FROM memory_metadata").fetchone()[0]
408
+
409
+ # Determine how many to delete
410
+ to_delete_count = max(0, min(len(candidates), total_count - max_to_keep))
411
+
412
+ if to_delete_count == 0:
413
+ return {
414
+ "success": True,
415
+ "deleted_count": 0,
416
+ "message": "No memories need to be deleted"
417
+ }
418
+
419
+ # Get IDs to delete
420
+ delete_ids = [str(row[0]) for row in candidates[:to_delete_count]]
421
+ placeholders = ",".join(["?"] * len(delete_ids))
422
+
423
+ # Delete from both tables
424
+ conn.execute(f"DELETE FROM memory_metadata WHERE id IN ({placeholders})", delete_ids)
425
+ conn.execute(f"DELETE FROM memory_vectors WHERE rowid IN ({placeholders})", delete_ids)
426
+
427
+ conn.commit()
428
+
429
+ return {
430
+ "success": True,
431
+ "deleted_count": to_delete_count,
432
+ "remaining_count": total_count - to_delete_count,
433
+ "message": f"Deleted {to_delete_count} old memories"
434
+ }
435
+
436
+ except SecurityError as e:
437
+ conn.rollback()
438
+ raise e
439
+ except Exception as e:
440
+ conn.rollback()
441
+ raise RuntimeError(f"Failed to clear old memories: {e}")
442
+ finally:
443
+ conn.close()
444
+
445
+ def get_memory_by_id(self, memory_id: int) -> Optional[MemoryEntry]:
446
+ """
447
+ Get a specific memory by ID.
448
+
449
+ Args:
450
+ memory_id: Memory ID to retrieve
451
+
452
+ Returns:
453
+ MemoryEntry object or None if not found
454
+ """
455
+ try:
456
+ conn = self._get_connection()
457
+ except Exception as e:
458
+ raise RuntimeError(f"Failed to store memory: {e}")
459
+
460
+ try:
461
+ result = conn.execute("""
462
+ SELECT id, content, category, tags, created_at, updated_at, access_count, content_hash
463
+ FROM memory_metadata
464
+ WHERE id = ?
465
+ """, (memory_id,)).fetchone()
466
+
467
+ if result:
468
+ return MemoryEntry.from_db_row(result)
469
+ return None
470
+
471
+ except Exception as e:
472
+ raise RuntimeError(f"Failed to get memory by ID: {e}")
473
+ finally:
474
+ conn.close()
475
+
476
+ def delete_memory(self, memory_id: int) -> bool:
477
+ """
478
+ Delete a specific memory by ID.
479
+
480
+ Args:
481
+ memory_id: Memory ID to delete
482
+
483
+ Returns:
484
+ bool: True if deleted, False if not found
485
+ """
486
+ try:
487
+ conn = self._get_connection()
488
+ except Exception as e:
489
+ raise RuntimeError(f"Failed to store memory: {e}")
490
+
491
+ try:
492
+ # Check if memory exists
493
+ exists = conn.execute(
494
+ "SELECT 1 FROM memory_metadata WHERE id = ?",
495
+ (memory_id,)
496
+ ).fetchone()
497
+
498
+ if not exists:
499
+ return False
500
+
501
+ # Delete from both tables
502
+ conn.execute("DELETE FROM memory_metadata WHERE id = ?", (memory_id,))
503
+ conn.execute("DELETE FROM memory_vectors WHERE rowid = ?", (memory_id,))
504
+
505
+ conn.commit()
506
+ return True
507
+
508
+ except Exception as e:
509
+ conn.rollback()
510
+ raise RuntimeError(f"Failed to delete memory: {e}")
511
+ finally:
512
+ conn.close()