tribalmemory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. tribalmemory/__init__.py +3 -0
  2. tribalmemory/a21/__init__.py +38 -0
  3. tribalmemory/a21/config/__init__.py +20 -0
  4. tribalmemory/a21/config/providers.py +104 -0
  5. tribalmemory/a21/config/system.py +184 -0
  6. tribalmemory/a21/container/__init__.py +8 -0
  7. tribalmemory/a21/container/container.py +212 -0
  8. tribalmemory/a21/providers/__init__.py +32 -0
  9. tribalmemory/a21/providers/base.py +241 -0
  10. tribalmemory/a21/providers/deduplication.py +99 -0
  11. tribalmemory/a21/providers/lancedb.py +232 -0
  12. tribalmemory/a21/providers/memory.py +128 -0
  13. tribalmemory/a21/providers/mock.py +54 -0
  14. tribalmemory/a21/providers/openai.py +151 -0
  15. tribalmemory/a21/providers/timestamp.py +88 -0
  16. tribalmemory/a21/system.py +293 -0
  17. tribalmemory/cli.py +298 -0
  18. tribalmemory/interfaces.py +306 -0
  19. tribalmemory/mcp/__init__.py +9 -0
  20. tribalmemory/mcp/__main__.py +6 -0
  21. tribalmemory/mcp/server.py +484 -0
  22. tribalmemory/performance/__init__.py +1 -0
  23. tribalmemory/performance/benchmarks.py +285 -0
  24. tribalmemory/performance/corpus_generator.py +171 -0
  25. tribalmemory/portability/__init__.py +1 -0
  26. tribalmemory/portability/embedding_metadata.py +320 -0
  27. tribalmemory/server/__init__.py +9 -0
  28. tribalmemory/server/__main__.py +6 -0
  29. tribalmemory/server/app.py +187 -0
  30. tribalmemory/server/config.py +115 -0
  31. tribalmemory/server/models.py +206 -0
  32. tribalmemory/server/routes.py +378 -0
  33. tribalmemory/services/__init__.py +15 -0
  34. tribalmemory/services/deduplication.py +115 -0
  35. tribalmemory/services/embeddings.py +273 -0
  36. tribalmemory/services/import_export.py +506 -0
  37. tribalmemory/services/memory.py +275 -0
  38. tribalmemory/services/vector_store.py +360 -0
  39. tribalmemory/testing/__init__.py +22 -0
  40. tribalmemory/testing/embedding_utils.py +110 -0
  41. tribalmemory/testing/fixtures.py +123 -0
  42. tribalmemory/testing/metrics.py +256 -0
  43. tribalmemory/testing/mocks.py +560 -0
  44. tribalmemory/testing/semantic_expansions.py +91 -0
  45. tribalmemory/utils.py +23 -0
  46. tribalmemory-0.1.0.dist-info/METADATA +275 -0
  47. tribalmemory-0.1.0.dist-info/RECORD +51 -0
  48. tribalmemory-0.1.0.dist-info/WHEEL +5 -0
  49. tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
  50. tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
  51. tribalmemory-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,206 @@
1
+ """Pydantic models for HTTP API request/response."""
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class SourceType(str, Enum):
11
+ """Memory source types."""
12
+ USER_EXPLICIT = "user_explicit"
13
+ AUTO_CAPTURE = "auto_capture"
14
+ CORRECTION = "correction"
15
+ CROSS_INSTANCE = "cross_instance"
16
+ LEGACY = "legacy"
17
+ UNKNOWN = "unknown"
18
+
19
+
20
+ # =============================================================================
21
+ # Request Models
22
+ # =============================================================================
23
+
24
+ class RememberRequest(BaseModel):
25
+ """Request to store a new memory."""
26
+ content: str = Field(..., description="The memory content to store")
27
+ source_type: SourceType = Field(
28
+ default=SourceType.AUTO_CAPTURE,
29
+ description="How this memory was captured"
30
+ )
31
+ context: Optional[str] = Field(
32
+ default=None,
33
+ description="Additional context about the capture"
34
+ )
35
+ tags: Optional[list[str]] = Field(
36
+ default=None,
37
+ description="Tags for categorization and filtering"
38
+ )
39
+ skip_dedup: bool = Field(
40
+ default=False,
41
+ description="If True, store even if similar memory exists"
42
+ )
43
+
44
+
45
+ class RecallRequest(BaseModel):
46
+ """Request to recall memories."""
47
+ query: str = Field(..., description="Natural language search query")
48
+ limit: int = Field(default=5, ge=1, le=50, description="Maximum results")
49
+ min_relevance: float = Field(
50
+ default=0.3,
51
+ ge=0.0,
52
+ le=1.0,
53
+ description="Minimum similarity score"
54
+ )
55
+ tags: Optional[list[str]] = Field(
56
+ default=None,
57
+ description="Filter by tags"
58
+ )
59
+
60
+
61
+ class CorrectRequest(BaseModel):
62
+ """Request to correct an existing memory."""
63
+ original_id: str = Field(..., description="ID of memory to correct")
64
+ corrected_content: str = Field(..., description="Corrected information")
65
+ context: Optional[str] = Field(
66
+ default=None,
67
+ description="Context about the correction"
68
+ )
69
+
70
+
71
+ # =============================================================================
72
+ # Response Models
73
+ # =============================================================================
74
+
75
+ class MemoryEntryResponse(BaseModel):
76
+ """A single memory entry."""
77
+ id: str
78
+ content: str
79
+ source_instance: str
80
+ source_type: SourceType
81
+ created_at: datetime
82
+ updated_at: datetime
83
+ tags: list[str]
84
+ context: Optional[str]
85
+ confidence: float
86
+ supersedes: Optional[str]
87
+
88
+ model_config = {"from_attributes": True}
89
+
90
+
91
+ class StoreResponse(BaseModel):
92
+ """Response from storing a memory."""
93
+ success: bool
94
+ memory_id: Optional[str] = None
95
+ duplicate_of: Optional[str] = None
96
+ error: Optional[str] = None
97
+
98
+
99
+ class RecallResultResponse(BaseModel):
100
+ """A single recall result with score."""
101
+ memory: MemoryEntryResponse
102
+ similarity_score: float
103
+ retrieval_time_ms: float
104
+
105
+
106
+ class RecallResponse(BaseModel):
107
+ """Response from recalling memories."""
108
+ results: list[RecallResultResponse]
109
+ query: str
110
+ total_time_ms: float
111
+ error: Optional[str] = None
112
+
113
+
114
+ class HealthResponse(BaseModel):
115
+ """Health check response."""
116
+ status: str = "ok"
117
+ instance_id: str
118
+ memory_count: int
119
+ version: str = "0.1.0"
120
+
121
+
122
+ class StatsResponse(BaseModel):
123
+ """Memory statistics response."""
124
+ total_memories: int
125
+ by_source_type: dict[str, int]
126
+ by_tag: dict[str, int]
127
+ instance_id: str
128
+
129
+
130
+ class ErrorResponse(BaseModel):
131
+ """Error response."""
132
+ error: str
133
+ detail: Optional[str] = None
134
+
135
+
136
+ class ForgetResponse(BaseModel):
137
+ """Response from forgetting a memory."""
138
+ success: bool
139
+ memory_id: str
140
+
141
+
142
+ class ShutdownResponse(BaseModel):
143
+ """Response from shutdown request."""
144
+ status: str
145
+
146
+
147
+ # =============================================================================
148
+ # Import/Export Models (Issue #7)
149
+ # =============================================================================
150
+
151
+ class ExportRequest(BaseModel):
152
+ """Request to export memories."""
153
+ tags: Optional[list[str]] = Field(
154
+ default=None,
155
+ description="Filter: only memories matching any of these tags",
156
+ )
157
+ date_from: Optional[str] = Field(
158
+ default=None,
159
+ description="Filter: ISO 8601 datetime lower bound (created_at)",
160
+ )
161
+ date_to: Optional[str] = Field(
162
+ default=None,
163
+ description="Filter: ISO 8601 datetime upper bound (created_at)",
164
+ )
165
+
166
+
167
+ class ExportResponse(BaseModel):
168
+ """Response containing the exported bundle."""
169
+ success: bool
170
+ memory_count: int = 0
171
+ bundle: Optional[dict] = None
172
+ error: Optional[str] = None
173
+
174
+
175
+ class ImportRequest(BaseModel):
176
+ """Request to import memories from a bundle."""
177
+ bundle: dict = Field(
178
+ ..., description="Portable bundle (manifest + entries)",
179
+ )
180
+ conflict_resolution: str = Field(
181
+ default="skip",
182
+ description="Conflict strategy: skip | overwrite | merge",
183
+ )
184
+ embedding_strategy: str = Field(
185
+ default="auto",
186
+ description="Embedding strategy: auto | keep | drop",
187
+ )
188
+ dry_run: bool = Field(
189
+ default=False,
190
+ description="Preview changes without writing",
191
+ )
192
+
193
+
194
+ class ImportResponse(BaseModel):
195
+ """Response from import operation."""
196
+ success: bool
197
+ total: int = 0
198
+ imported: int = 0
199
+ skipped: int = 0
200
+ overwritten: int = 0
201
+ errors: int = 0
202
+ needs_reembedding: bool = False
203
+ dry_run: bool = False
204
+ duration_ms: float = 0.0
205
+ error_details: list[str] = Field(default_factory=list)
206
+ error: Optional[str] = None
@@ -0,0 +1,378 @@
1
+ """API route handlers."""
2
+
3
+ import time
4
+ from typing import Optional
5
+
6
+ from fastapi import APIRouter, HTTPException, Depends
7
+
8
+ from ..interfaces import MemorySource, MemoryEntry
9
+ from ..services import TribalMemoryService
10
+ from .models import (
11
+ RememberRequest,
12
+ RecallRequest,
13
+ CorrectRequest,
14
+ StoreResponse,
15
+ RecallResponse,
16
+ RecallResultResponse,
17
+ MemoryEntryResponse,
18
+ HealthResponse,
19
+ StatsResponse,
20
+ ForgetResponse,
21
+ ShutdownResponse,
22
+ SourceType,
23
+ ExportRequest,
24
+ ExportResponse,
25
+ ImportRequest,
26
+ ImportResponse,
27
+ )
28
+
29
+ router = APIRouter(prefix="/v1", tags=["memory"])
30
+
31
+
32
+ def get_memory_service() -> TribalMemoryService:
33
+ """Dependency injection for memory service.
34
+
35
+ This is set by the app during startup.
36
+ """
37
+ from .app import _memory_service
38
+ if _memory_service is None:
39
+ raise HTTPException(status_code=503, detail="Service not initialized")
40
+ return _memory_service
41
+
42
+
43
+ def get_instance_id() -> str:
44
+ """Get the current instance ID."""
45
+ from .app import _instance_id
46
+ return _instance_id or "unknown"
47
+
48
+
49
+ def _convert_source_type(source_type: SourceType) -> MemorySource:
50
+ """Convert API source type to internal enum."""
51
+ return MemorySource(source_type.value)
52
+
53
+
54
+ def _entry_to_response(entry: MemoryEntry) -> MemoryEntryResponse:
55
+ """Convert internal MemoryEntry to API response."""
56
+ return MemoryEntryResponse(
57
+ id=entry.id,
58
+ content=entry.content,
59
+ source_instance=entry.source_instance,
60
+ source_type=SourceType(entry.source_type.value),
61
+ created_at=entry.created_at,
62
+ updated_at=entry.updated_at,
63
+ tags=entry.tags,
64
+ context=entry.context,
65
+ confidence=entry.confidence,
66
+ supersedes=entry.supersedes,
67
+ )
68
+
69
+
70
+ @router.post("/remember", response_model=StoreResponse)
71
+ async def remember(
72
+ request: RememberRequest,
73
+ service: TribalMemoryService = Depends(get_memory_service),
74
+ ) -> StoreResponse:
75
+ """Store a new memory."""
76
+ try:
77
+ result = await service.remember(
78
+ content=request.content,
79
+ source_type=_convert_source_type(request.source_type),
80
+ context=request.context,
81
+ tags=request.tags,
82
+ skip_dedup=request.skip_dedup,
83
+ )
84
+
85
+ return StoreResponse(
86
+ success=result.success,
87
+ memory_id=result.memory_id,
88
+ duplicate_of=result.duplicate_of,
89
+ error=result.error,
90
+ )
91
+ except Exception as e:
92
+ return StoreResponse(success=False, error=str(e))
93
+
94
+
95
+ @router.post("/recall", response_model=RecallResponse)
96
+ async def recall(
97
+ request: RecallRequest,
98
+ service: TribalMemoryService = Depends(get_memory_service),
99
+ ) -> RecallResponse:
100
+ """Recall relevant memories for a query."""
101
+ try:
102
+ start_time = time.time()
103
+
104
+ results = await service.recall(
105
+ query=request.query,
106
+ limit=request.limit,
107
+ min_relevance=request.min_relevance,
108
+ tags=request.tags,
109
+ )
110
+
111
+ total_time_ms = (time.time() - start_time) * 1000
112
+
113
+ return RecallResponse(
114
+ results=[
115
+ RecallResultResponse(
116
+ memory=_entry_to_response(r.memory),
117
+ similarity_score=r.similarity_score,
118
+ retrieval_time_ms=r.retrieval_time_ms,
119
+ )
120
+ for r in results
121
+ ],
122
+ query=request.query,
123
+ total_time_ms=total_time_ms,
124
+ )
125
+ except Exception as e:
126
+ # Return empty results with error info for consistency with other endpoints
127
+ return RecallResponse(
128
+ results=[],
129
+ query=request.query,
130
+ total_time_ms=0.0,
131
+ error=str(e),
132
+ )
133
+
134
+
135
+ @router.post("/correct", response_model=StoreResponse)
136
+ async def correct(
137
+ request: CorrectRequest,
138
+ service: TribalMemoryService = Depends(get_memory_service),
139
+ ) -> StoreResponse:
140
+ """Correct an existing memory."""
141
+ try:
142
+ result = await service.correct(
143
+ original_id=request.original_id,
144
+ corrected_content=request.corrected_content,
145
+ context=request.context,
146
+ )
147
+
148
+ return StoreResponse(
149
+ success=result.success,
150
+ memory_id=result.memory_id,
151
+ error=result.error,
152
+ )
153
+ except Exception as e:
154
+ return StoreResponse(success=False, error=str(e))
155
+
156
+
157
+ @router.delete("/forget/{memory_id}", response_model=ForgetResponse)
158
+ async def forget(
159
+ memory_id: str,
160
+ service: TribalMemoryService = Depends(get_memory_service),
161
+ ) -> ForgetResponse:
162
+ """Forget (delete) a specific memory. GDPR-compliant."""
163
+ try:
164
+ success = await service.forget(memory_id)
165
+ return ForgetResponse(success=success, memory_id=memory_id)
166
+ except Exception as e:
167
+ return ForgetResponse(success=False, memory_id=memory_id)
168
+
169
+
170
+ @router.get("/memory/{memory_id}", response_model=MemoryEntryResponse)
171
+ async def get_memory(
172
+ memory_id: str,
173
+ service: TribalMemoryService = Depends(get_memory_service),
174
+ ) -> MemoryEntryResponse:
175
+ """Get a specific memory by ID."""
176
+ entry = await service.get(memory_id)
177
+ if entry is None:
178
+ raise HTTPException(status_code=404, detail=f"Memory {memory_id} not found")
179
+ return _entry_to_response(entry)
180
+
181
+
182
+ @router.get("/health", response_model=HealthResponse)
183
+ async def health(
184
+ service: TribalMemoryService = Depends(get_memory_service),
185
+ instance_id: str = Depends(get_instance_id),
186
+ ) -> HealthResponse:
187
+ """Health check endpoint."""
188
+ try:
189
+ stats = await service.get_stats()
190
+ return HealthResponse(
191
+ status="ok",
192
+ instance_id=instance_id,
193
+ memory_count=stats.get("total_memories", 0),
194
+ )
195
+ except Exception:
196
+ return HealthResponse(
197
+ status="degraded",
198
+ instance_id=instance_id,
199
+ memory_count=0,
200
+ )
201
+
202
+
203
+ @router.get("/stats", response_model=StatsResponse)
204
+ async def stats(
205
+ service: TribalMemoryService = Depends(get_memory_service),
206
+ instance_id: str = Depends(get_instance_id),
207
+ ) -> StatsResponse:
208
+ """Get memory statistics."""
209
+ stats_data = await service.get_stats()
210
+ return StatsResponse(
211
+ total_memories=stats_data.get("total_memories", 0),
212
+ by_source_type=stats_data.get("by_source_type", {}),
213
+ by_tag=stats_data.get("by_tag", {}),
214
+ instance_id=instance_id,
215
+ )
216
+
217
+
218
+ @router.post("/export", response_model=ExportResponse)
219
+ async def export_memories_route(
220
+ request: ExportRequest,
221
+ service: TribalMemoryService = Depends(get_memory_service),
222
+ ) -> ExportResponse:
223
+ """Export memories with optional tag/date filtering."""
224
+ from ..portability.embedding_metadata import (
225
+ create_embedding_metadata,
226
+ )
227
+ from ..services.import_export import (
228
+ ExportFilter,
229
+ export_memories,
230
+ parse_iso_datetime,
231
+ )
232
+
233
+ # Validate dates
234
+ parsed_from, err = parse_iso_datetime(
235
+ request.date_from, "date_from",
236
+ )
237
+ if err:
238
+ return ExportResponse(success=False, error=err)
239
+ parsed_to, err = parse_iso_datetime(
240
+ request.date_to, "date_to",
241
+ )
242
+ if err:
243
+ return ExportResponse(success=False, error=err)
244
+
245
+ try:
246
+ emb = service.embedding_service
247
+ meta = create_embedding_metadata(
248
+ model_name=getattr(emb, "model", "unknown"),
249
+ dimensions=getattr(emb, "dimensions", 1536),
250
+ provider="openai",
251
+ )
252
+
253
+ flt = None
254
+ if request.tags or parsed_from or parsed_to:
255
+ flt = ExportFilter(
256
+ tags=request.tags,
257
+ date_from=parsed_from,
258
+ date_to=parsed_to,
259
+ )
260
+
261
+ bundle = await export_memories(
262
+ store=service.vector_store,
263
+ embedding_metadata=meta,
264
+ filters=flt,
265
+ )
266
+
267
+ return ExportResponse(
268
+ success=True,
269
+ memory_count=bundle.manifest.memory_count,
270
+ bundle=bundle.to_dict(),
271
+ )
272
+ except Exception as e:
273
+ return ExportResponse(success=False, error=str(e))
274
+
275
+
276
+ @router.post("/import", response_model=ImportResponse)
277
+ async def import_memories_route(
278
+ request: ImportRequest,
279
+ service: TribalMemoryService = Depends(get_memory_service),
280
+ ) -> ImportResponse:
281
+ """Import memories from a portable bundle."""
282
+ from ..portability.embedding_metadata import (
283
+ PortableBundle,
284
+ ReembeddingStrategy,
285
+ create_embedding_metadata,
286
+ )
287
+ from ..services.import_export import (
288
+ ConflictResolution,
289
+ import_memories,
290
+ validate_conflict_resolution,
291
+ validate_embedding_strategy,
292
+ )
293
+
294
+ # Validate enum params
295
+ err = validate_conflict_resolution(
296
+ request.conflict_resolution,
297
+ )
298
+ if err:
299
+ return ImportResponse(success=False, error=err)
300
+ err = validate_embedding_strategy(
301
+ request.embedding_strategy,
302
+ )
303
+ if err:
304
+ return ImportResponse(success=False, error=err)
305
+
306
+ try:
307
+ bundle = PortableBundle.from_dict(request.bundle)
308
+ except Exception as e:
309
+ return ImportResponse(
310
+ success=False, error=f"Invalid bundle: {e}",
311
+ )
312
+
313
+ emb = service.embedding_service
314
+ target_meta = create_embedding_metadata(
315
+ model_name=getattr(emb, "model", "unknown"),
316
+ dimensions=getattr(emb, "dimensions", 1536),
317
+ provider="openai",
318
+ )
319
+
320
+ cr_map = {
321
+ "skip": ConflictResolution.SKIP,
322
+ "overwrite": ConflictResolution.OVERWRITE,
323
+ "merge": ConflictResolution.MERGE,
324
+ }
325
+ es_map = {
326
+ "auto": ReembeddingStrategy.AUTO,
327
+ "keep": ReembeddingStrategy.KEEP,
328
+ "drop": ReembeddingStrategy.DROP,
329
+ }
330
+
331
+ try:
332
+ summary = await import_memories(
333
+ bundle=bundle,
334
+ store=service.vector_store,
335
+ target_metadata=target_meta,
336
+ conflict_resolution=cr_map[
337
+ request.conflict_resolution
338
+ ],
339
+ embedding_strategy=es_map[
340
+ request.embedding_strategy
341
+ ],
342
+ dry_run=request.dry_run,
343
+ )
344
+
345
+ return ImportResponse(
346
+ success=True,
347
+ total=summary.total,
348
+ imported=summary.imported,
349
+ skipped=summary.skipped,
350
+ overwritten=summary.overwritten,
351
+ errors=summary.errors,
352
+ needs_reembedding=summary.needs_reembedding,
353
+ dry_run=summary.dry_run,
354
+ duration_ms=round(summary.duration_ms, 1),
355
+ error_details=summary.error_details,
356
+ )
357
+ except Exception as e:
358
+ return ImportResponse(success=False, error=str(e))
359
+
360
+
361
+ @router.post("/shutdown", response_model=ShutdownResponse)
362
+ async def shutdown() -> ShutdownResponse:
363
+ """Graceful shutdown endpoint.
364
+
365
+ Security note: This endpoint is localhost-only (bound to 127.0.0.1).
366
+ It allows local process management without authentication since only
367
+ processes on the same machine can reach it. For production deployments
368
+ with network exposure, use systemctl/signals instead of this endpoint.
369
+ """
370
+ import asyncio
371
+ import signal
372
+ import os
373
+
374
+ # Schedule shutdown after response is sent
375
+ asyncio.get_event_loop().call_later(
376
+ 0.5, lambda: os.kill(os.getpid(), signal.SIGTERM)
377
+ )
378
+ return ShutdownResponse(status="shutting_down")
@@ -0,0 +1,15 @@
1
+ """Tribal Memory service implementations."""
2
+
3
+ from .embeddings import OpenAIEmbeddingService
4
+ from .vector_store import LanceDBVectorStore, InMemoryVectorStore
5
+ from .memory import TribalMemoryService, create_memory_service
6
+ from .deduplication import SemanticDeduplicationService
7
+
8
+ __all__ = [
9
+ "OpenAIEmbeddingService",
10
+ "LanceDBVectorStore",
11
+ "InMemoryVectorStore",
12
+ "TribalMemoryService",
13
+ "create_memory_service",
14
+ "SemanticDeduplicationService",
15
+ ]
@@ -0,0 +1,115 @@
1
+ """Semantic Deduplication Service."""
2
+
3
+ from typing import Optional
4
+
5
+ from ..interfaces import IDeduplicationService, IVectorStore, IEmbeddingService
6
+
7
+
8
+ class SemanticDeduplicationService(IDeduplicationService):
9
+ """Semantic deduplication using embedding similarity."""
10
+
11
+ def __init__(
12
+ self,
13
+ vector_store: IVectorStore,
14
+ embedding_service: IEmbeddingService,
15
+ exact_threshold: float = 0.98,
16
+ near_threshold: float = 0.90,
17
+ ):
18
+ self.vector_store = vector_store
19
+ self.embedding_service = embedding_service
20
+ self.exact_threshold = exact_threshold
21
+ self.near_threshold = near_threshold
22
+
23
+ async def is_duplicate(
24
+ self,
25
+ content: str,
26
+ embedding: list[float],
27
+ threshold: Optional[float] = None
28
+ ) -> tuple[bool, Optional[str]]:
29
+ """Check if content is a duplicate.
30
+
31
+ Returns:
32
+ Tuple of (is_duplicate, duplicate_of_id)
33
+ """
34
+ threshold = threshold or self.exact_threshold
35
+
36
+ results = await self.vector_store.recall(
37
+ embedding,
38
+ limit=1,
39
+ min_similarity=threshold
40
+ )
41
+
42
+ if results and results[0].similarity_score >= threshold:
43
+ return True, results[0].memory.id
44
+
45
+ return False, None
46
+
47
+ async def find_similar(
48
+ self,
49
+ content: str,
50
+ embedding: list[float],
51
+ threshold: Optional[float] = None,
52
+ limit: int = 10,
53
+ ) -> list[tuple[str, float]]:
54
+ """Find similar memories.
55
+
56
+ Returns:
57
+ List of (memory_id, similarity_score) tuples
58
+ """
59
+ threshold = threshold or self.near_threshold
60
+
61
+ results = await self.vector_store.recall(
62
+ embedding,
63
+ limit=limit,
64
+ min_similarity=threshold
65
+ )
66
+
67
+ return [(r.memory.id, r.similarity_score) for r in results]
68
+
69
+ async def get_duplicate_report(
70
+ self,
71
+ content: str,
72
+ embedding: list[float]
73
+ ) -> dict:
74
+ """Get detailed duplicate analysis report.
75
+
76
+ Args:
77
+ content: Text content to analyze.
78
+ embedding: Pre-computed embedding vector for the content.
79
+
80
+ Returns:
81
+ Dict with keys:
82
+ - is_duplicate (bool): True if above exact threshold
83
+ - is_near_duplicate (bool): True if above near threshold
84
+ - top_match (dict|None): Best matching memory with id, content preview, similarity
85
+ - candidates (list): Top 5 similar memories with id, similarity, content_preview
86
+ """
87
+ results = await self.vector_store.recall(
88
+ embedding,
89
+ limit=5,
90
+ min_similarity=0.7
91
+ )
92
+
93
+ if not results:
94
+ return {
95
+ "is_duplicate": False,
96
+ "is_near_duplicate": False,
97
+ "top_match": None,
98
+ "candidates": []
99
+ }
100
+
101
+ top = results[0]
102
+
103
+ return {
104
+ "is_duplicate": top.similarity_score >= self.exact_threshold,
105
+ "is_near_duplicate": top.similarity_score >= self.near_threshold,
106
+ "top_match": {
107
+ "id": top.memory.id,
108
+ "content": top.memory.content[:200] + "..." if len(top.memory.content) > 200 else top.memory.content,
109
+ "similarity": top.similarity_score,
110
+ },
111
+ "candidates": [
112
+ {"id": r.memory.id, "similarity": r.similarity_score, "content_preview": r.memory.content[:100]}
113
+ for r in results
114
+ ],
115
+ }