tribalmemory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tribalmemory/__init__.py +3 -0
- tribalmemory/a21/__init__.py +38 -0
- tribalmemory/a21/config/__init__.py +20 -0
- tribalmemory/a21/config/providers.py +104 -0
- tribalmemory/a21/config/system.py +184 -0
- tribalmemory/a21/container/__init__.py +8 -0
- tribalmemory/a21/container/container.py +212 -0
- tribalmemory/a21/providers/__init__.py +32 -0
- tribalmemory/a21/providers/base.py +241 -0
- tribalmemory/a21/providers/deduplication.py +99 -0
- tribalmemory/a21/providers/lancedb.py +232 -0
- tribalmemory/a21/providers/memory.py +128 -0
- tribalmemory/a21/providers/mock.py +54 -0
- tribalmemory/a21/providers/openai.py +151 -0
- tribalmemory/a21/providers/timestamp.py +88 -0
- tribalmemory/a21/system.py +293 -0
- tribalmemory/cli.py +298 -0
- tribalmemory/interfaces.py +306 -0
- tribalmemory/mcp/__init__.py +9 -0
- tribalmemory/mcp/__main__.py +6 -0
- tribalmemory/mcp/server.py +484 -0
- tribalmemory/performance/__init__.py +1 -0
- tribalmemory/performance/benchmarks.py +285 -0
- tribalmemory/performance/corpus_generator.py +171 -0
- tribalmemory/portability/__init__.py +1 -0
- tribalmemory/portability/embedding_metadata.py +320 -0
- tribalmemory/server/__init__.py +9 -0
- tribalmemory/server/__main__.py +6 -0
- tribalmemory/server/app.py +187 -0
- tribalmemory/server/config.py +115 -0
- tribalmemory/server/models.py +206 -0
- tribalmemory/server/routes.py +378 -0
- tribalmemory/services/__init__.py +15 -0
- tribalmemory/services/deduplication.py +115 -0
- tribalmemory/services/embeddings.py +273 -0
- tribalmemory/services/import_export.py +506 -0
- tribalmemory/services/memory.py +275 -0
- tribalmemory/services/vector_store.py +360 -0
- tribalmemory/testing/__init__.py +22 -0
- tribalmemory/testing/embedding_utils.py +110 -0
- tribalmemory/testing/fixtures.py +123 -0
- tribalmemory/testing/metrics.py +256 -0
- tribalmemory/testing/mocks.py +560 -0
- tribalmemory/testing/semantic_expansions.py +91 -0
- tribalmemory/utils.py +23 -0
- tribalmemory-0.1.0.dist-info/METADATA +275 -0
- tribalmemory-0.1.0.dist-info/RECORD +51 -0
- tribalmemory-0.1.0.dist-info/WHEEL +5 -0
- tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
- tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
- tribalmemory-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Pydantic models for HTTP API request/response."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SourceType(str, Enum):
|
|
11
|
+
"""Memory source types."""
|
|
12
|
+
USER_EXPLICIT = "user_explicit"
|
|
13
|
+
AUTO_CAPTURE = "auto_capture"
|
|
14
|
+
CORRECTION = "correction"
|
|
15
|
+
CROSS_INSTANCE = "cross_instance"
|
|
16
|
+
LEGACY = "legacy"
|
|
17
|
+
UNKNOWN = "unknown"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# Request Models
|
|
22
|
+
# =============================================================================
|
|
23
|
+
|
|
24
|
+
class RememberRequest(BaseModel):
|
|
25
|
+
"""Request to store a new memory."""
|
|
26
|
+
content: str = Field(..., description="The memory content to store")
|
|
27
|
+
source_type: SourceType = Field(
|
|
28
|
+
default=SourceType.AUTO_CAPTURE,
|
|
29
|
+
description="How this memory was captured"
|
|
30
|
+
)
|
|
31
|
+
context: Optional[str] = Field(
|
|
32
|
+
default=None,
|
|
33
|
+
description="Additional context about the capture"
|
|
34
|
+
)
|
|
35
|
+
tags: Optional[list[str]] = Field(
|
|
36
|
+
default=None,
|
|
37
|
+
description="Tags for categorization and filtering"
|
|
38
|
+
)
|
|
39
|
+
skip_dedup: bool = Field(
|
|
40
|
+
default=False,
|
|
41
|
+
description="If True, store even if similar memory exists"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RecallRequest(BaseModel):
|
|
46
|
+
"""Request to recall memories."""
|
|
47
|
+
query: str = Field(..., description="Natural language search query")
|
|
48
|
+
limit: int = Field(default=5, ge=1, le=50, description="Maximum results")
|
|
49
|
+
min_relevance: float = Field(
|
|
50
|
+
default=0.3,
|
|
51
|
+
ge=0.0,
|
|
52
|
+
le=1.0,
|
|
53
|
+
description="Minimum similarity score"
|
|
54
|
+
)
|
|
55
|
+
tags: Optional[list[str]] = Field(
|
|
56
|
+
default=None,
|
|
57
|
+
description="Filter by tags"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CorrectRequest(BaseModel):
|
|
62
|
+
"""Request to correct an existing memory."""
|
|
63
|
+
original_id: str = Field(..., description="ID of memory to correct")
|
|
64
|
+
corrected_content: str = Field(..., description="Corrected information")
|
|
65
|
+
context: Optional[str] = Field(
|
|
66
|
+
default=None,
|
|
67
|
+
description="Context about the correction"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# =============================================================================
|
|
72
|
+
# Response Models
|
|
73
|
+
# =============================================================================
|
|
74
|
+
|
|
75
|
+
class MemoryEntryResponse(BaseModel):
|
|
76
|
+
"""A single memory entry."""
|
|
77
|
+
id: str
|
|
78
|
+
content: str
|
|
79
|
+
source_instance: str
|
|
80
|
+
source_type: SourceType
|
|
81
|
+
created_at: datetime
|
|
82
|
+
updated_at: datetime
|
|
83
|
+
tags: list[str]
|
|
84
|
+
context: Optional[str]
|
|
85
|
+
confidence: float
|
|
86
|
+
supersedes: Optional[str]
|
|
87
|
+
|
|
88
|
+
model_config = {"from_attributes": True}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class StoreResponse(BaseModel):
|
|
92
|
+
"""Response from storing a memory."""
|
|
93
|
+
success: bool
|
|
94
|
+
memory_id: Optional[str] = None
|
|
95
|
+
duplicate_of: Optional[str] = None
|
|
96
|
+
error: Optional[str] = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class RecallResultResponse(BaseModel):
|
|
100
|
+
"""A single recall result with score."""
|
|
101
|
+
memory: MemoryEntryResponse
|
|
102
|
+
similarity_score: float
|
|
103
|
+
retrieval_time_ms: float
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class RecallResponse(BaseModel):
|
|
107
|
+
"""Response from recalling memories."""
|
|
108
|
+
results: list[RecallResultResponse]
|
|
109
|
+
query: str
|
|
110
|
+
total_time_ms: float
|
|
111
|
+
error: Optional[str] = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class HealthResponse(BaseModel):
|
|
115
|
+
"""Health check response."""
|
|
116
|
+
status: str = "ok"
|
|
117
|
+
instance_id: str
|
|
118
|
+
memory_count: int
|
|
119
|
+
version: str = "0.1.0"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class StatsResponse(BaseModel):
|
|
123
|
+
"""Memory statistics response."""
|
|
124
|
+
total_memories: int
|
|
125
|
+
by_source_type: dict[str, int]
|
|
126
|
+
by_tag: dict[str, int]
|
|
127
|
+
instance_id: str
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ErrorResponse(BaseModel):
|
|
131
|
+
"""Error response."""
|
|
132
|
+
error: str
|
|
133
|
+
detail: Optional[str] = None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ForgetResponse(BaseModel):
|
|
137
|
+
"""Response from forgetting a memory."""
|
|
138
|
+
success: bool
|
|
139
|
+
memory_id: str
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class ShutdownResponse(BaseModel):
|
|
143
|
+
"""Response from shutdown request."""
|
|
144
|
+
status: str
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# =============================================================================
|
|
148
|
+
# Import/Export Models (Issue #7)
|
|
149
|
+
# =============================================================================
|
|
150
|
+
|
|
151
|
+
class ExportRequest(BaseModel):
|
|
152
|
+
"""Request to export memories."""
|
|
153
|
+
tags: Optional[list[str]] = Field(
|
|
154
|
+
default=None,
|
|
155
|
+
description="Filter: only memories matching any of these tags",
|
|
156
|
+
)
|
|
157
|
+
date_from: Optional[str] = Field(
|
|
158
|
+
default=None,
|
|
159
|
+
description="Filter: ISO 8601 datetime lower bound (created_at)",
|
|
160
|
+
)
|
|
161
|
+
date_to: Optional[str] = Field(
|
|
162
|
+
default=None,
|
|
163
|
+
description="Filter: ISO 8601 datetime upper bound (created_at)",
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ExportResponse(BaseModel):
|
|
168
|
+
"""Response containing the exported bundle."""
|
|
169
|
+
success: bool
|
|
170
|
+
memory_count: int = 0
|
|
171
|
+
bundle: Optional[dict] = None
|
|
172
|
+
error: Optional[str] = None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class ImportRequest(BaseModel):
|
|
176
|
+
"""Request to import memories from a bundle."""
|
|
177
|
+
bundle: dict = Field(
|
|
178
|
+
..., description="Portable bundle (manifest + entries)",
|
|
179
|
+
)
|
|
180
|
+
conflict_resolution: str = Field(
|
|
181
|
+
default="skip",
|
|
182
|
+
description="Conflict strategy: skip | overwrite | merge",
|
|
183
|
+
)
|
|
184
|
+
embedding_strategy: str = Field(
|
|
185
|
+
default="auto",
|
|
186
|
+
description="Embedding strategy: auto | keep | drop",
|
|
187
|
+
)
|
|
188
|
+
dry_run: bool = Field(
|
|
189
|
+
default=False,
|
|
190
|
+
description="Preview changes without writing",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ImportResponse(BaseModel):
|
|
195
|
+
"""Response from import operation."""
|
|
196
|
+
success: bool
|
|
197
|
+
total: int = 0
|
|
198
|
+
imported: int = 0
|
|
199
|
+
skipped: int = 0
|
|
200
|
+
overwritten: int = 0
|
|
201
|
+
errors: int = 0
|
|
202
|
+
needs_reembedding: bool = False
|
|
203
|
+
dry_run: bool = False
|
|
204
|
+
duration_ms: float = 0.0
|
|
205
|
+
error_details: list[str] = Field(default_factory=list)
|
|
206
|
+
error: Optional[str] = None
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""API route handlers."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, HTTPException, Depends
|
|
7
|
+
|
|
8
|
+
from ..interfaces import MemorySource, MemoryEntry
|
|
9
|
+
from ..services import TribalMemoryService
|
|
10
|
+
from .models import (
|
|
11
|
+
RememberRequest,
|
|
12
|
+
RecallRequest,
|
|
13
|
+
CorrectRequest,
|
|
14
|
+
StoreResponse,
|
|
15
|
+
RecallResponse,
|
|
16
|
+
RecallResultResponse,
|
|
17
|
+
MemoryEntryResponse,
|
|
18
|
+
HealthResponse,
|
|
19
|
+
StatsResponse,
|
|
20
|
+
ForgetResponse,
|
|
21
|
+
ShutdownResponse,
|
|
22
|
+
SourceType,
|
|
23
|
+
ExportRequest,
|
|
24
|
+
ExportResponse,
|
|
25
|
+
ImportRequest,
|
|
26
|
+
ImportResponse,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
router = APIRouter(prefix="/v1", tags=["memory"])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_memory_service() -> TribalMemoryService:
|
|
33
|
+
"""Dependency injection for memory service.
|
|
34
|
+
|
|
35
|
+
This is set by the app during startup.
|
|
36
|
+
"""
|
|
37
|
+
from .app import _memory_service
|
|
38
|
+
if _memory_service is None:
|
|
39
|
+
raise HTTPException(status_code=503, detail="Service not initialized")
|
|
40
|
+
return _memory_service
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_instance_id() -> str:
|
|
44
|
+
"""Get the current instance ID."""
|
|
45
|
+
from .app import _instance_id
|
|
46
|
+
return _instance_id or "unknown"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _convert_source_type(source_type: SourceType) -> MemorySource:
|
|
50
|
+
"""Convert API source type to internal enum."""
|
|
51
|
+
return MemorySource(source_type.value)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _entry_to_response(entry: MemoryEntry) -> MemoryEntryResponse:
|
|
55
|
+
"""Convert internal MemoryEntry to API response."""
|
|
56
|
+
return MemoryEntryResponse(
|
|
57
|
+
id=entry.id,
|
|
58
|
+
content=entry.content,
|
|
59
|
+
source_instance=entry.source_instance,
|
|
60
|
+
source_type=SourceType(entry.source_type.value),
|
|
61
|
+
created_at=entry.created_at,
|
|
62
|
+
updated_at=entry.updated_at,
|
|
63
|
+
tags=entry.tags,
|
|
64
|
+
context=entry.context,
|
|
65
|
+
confidence=entry.confidence,
|
|
66
|
+
supersedes=entry.supersedes,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@router.post("/remember", response_model=StoreResponse)
|
|
71
|
+
async def remember(
|
|
72
|
+
request: RememberRequest,
|
|
73
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
74
|
+
) -> StoreResponse:
|
|
75
|
+
"""Store a new memory."""
|
|
76
|
+
try:
|
|
77
|
+
result = await service.remember(
|
|
78
|
+
content=request.content,
|
|
79
|
+
source_type=_convert_source_type(request.source_type),
|
|
80
|
+
context=request.context,
|
|
81
|
+
tags=request.tags,
|
|
82
|
+
skip_dedup=request.skip_dedup,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return StoreResponse(
|
|
86
|
+
success=result.success,
|
|
87
|
+
memory_id=result.memory_id,
|
|
88
|
+
duplicate_of=result.duplicate_of,
|
|
89
|
+
error=result.error,
|
|
90
|
+
)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
return StoreResponse(success=False, error=str(e))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@router.post("/recall", response_model=RecallResponse)
|
|
96
|
+
async def recall(
|
|
97
|
+
request: RecallRequest,
|
|
98
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
99
|
+
) -> RecallResponse:
|
|
100
|
+
"""Recall relevant memories for a query."""
|
|
101
|
+
try:
|
|
102
|
+
start_time = time.time()
|
|
103
|
+
|
|
104
|
+
results = await service.recall(
|
|
105
|
+
query=request.query,
|
|
106
|
+
limit=request.limit,
|
|
107
|
+
min_relevance=request.min_relevance,
|
|
108
|
+
tags=request.tags,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
total_time_ms = (time.time() - start_time) * 1000
|
|
112
|
+
|
|
113
|
+
return RecallResponse(
|
|
114
|
+
results=[
|
|
115
|
+
RecallResultResponse(
|
|
116
|
+
memory=_entry_to_response(r.memory),
|
|
117
|
+
similarity_score=r.similarity_score,
|
|
118
|
+
retrieval_time_ms=r.retrieval_time_ms,
|
|
119
|
+
)
|
|
120
|
+
for r in results
|
|
121
|
+
],
|
|
122
|
+
query=request.query,
|
|
123
|
+
total_time_ms=total_time_ms,
|
|
124
|
+
)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
# Return empty results with error info for consistency with other endpoints
|
|
127
|
+
return RecallResponse(
|
|
128
|
+
results=[],
|
|
129
|
+
query=request.query,
|
|
130
|
+
total_time_ms=0.0,
|
|
131
|
+
error=str(e),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@router.post("/correct", response_model=StoreResponse)
|
|
136
|
+
async def correct(
|
|
137
|
+
request: CorrectRequest,
|
|
138
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
139
|
+
) -> StoreResponse:
|
|
140
|
+
"""Correct an existing memory."""
|
|
141
|
+
try:
|
|
142
|
+
result = await service.correct(
|
|
143
|
+
original_id=request.original_id,
|
|
144
|
+
corrected_content=request.corrected_content,
|
|
145
|
+
context=request.context,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return StoreResponse(
|
|
149
|
+
success=result.success,
|
|
150
|
+
memory_id=result.memory_id,
|
|
151
|
+
error=result.error,
|
|
152
|
+
)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
return StoreResponse(success=False, error=str(e))
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@router.delete("/forget/{memory_id}", response_model=ForgetResponse)
|
|
158
|
+
async def forget(
|
|
159
|
+
memory_id: str,
|
|
160
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
161
|
+
) -> ForgetResponse:
|
|
162
|
+
"""Forget (delete) a specific memory. GDPR-compliant."""
|
|
163
|
+
try:
|
|
164
|
+
success = await service.forget(memory_id)
|
|
165
|
+
return ForgetResponse(success=success, memory_id=memory_id)
|
|
166
|
+
except Exception as e:
|
|
167
|
+
return ForgetResponse(success=False, memory_id=memory_id)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@router.get("/memory/{memory_id}", response_model=MemoryEntryResponse)
|
|
171
|
+
async def get_memory(
|
|
172
|
+
memory_id: str,
|
|
173
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
174
|
+
) -> MemoryEntryResponse:
|
|
175
|
+
"""Get a specific memory by ID."""
|
|
176
|
+
entry = await service.get(memory_id)
|
|
177
|
+
if entry is None:
|
|
178
|
+
raise HTTPException(status_code=404, detail=f"Memory {memory_id} not found")
|
|
179
|
+
return _entry_to_response(entry)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@router.get("/health", response_model=HealthResponse)
|
|
183
|
+
async def health(
|
|
184
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
185
|
+
instance_id: str = Depends(get_instance_id),
|
|
186
|
+
) -> HealthResponse:
|
|
187
|
+
"""Health check endpoint."""
|
|
188
|
+
try:
|
|
189
|
+
stats = await service.get_stats()
|
|
190
|
+
return HealthResponse(
|
|
191
|
+
status="ok",
|
|
192
|
+
instance_id=instance_id,
|
|
193
|
+
memory_count=stats.get("total_memories", 0),
|
|
194
|
+
)
|
|
195
|
+
except Exception:
|
|
196
|
+
return HealthResponse(
|
|
197
|
+
status="degraded",
|
|
198
|
+
instance_id=instance_id,
|
|
199
|
+
memory_count=0,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@router.get("/stats", response_model=StatsResponse)
|
|
204
|
+
async def stats(
|
|
205
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
206
|
+
instance_id: str = Depends(get_instance_id),
|
|
207
|
+
) -> StatsResponse:
|
|
208
|
+
"""Get memory statistics."""
|
|
209
|
+
stats_data = await service.get_stats()
|
|
210
|
+
return StatsResponse(
|
|
211
|
+
total_memories=stats_data.get("total_memories", 0),
|
|
212
|
+
by_source_type=stats_data.get("by_source_type", {}),
|
|
213
|
+
by_tag=stats_data.get("by_tag", {}),
|
|
214
|
+
instance_id=instance_id,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@router.post("/export", response_model=ExportResponse)
|
|
219
|
+
async def export_memories_route(
|
|
220
|
+
request: ExportRequest,
|
|
221
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
222
|
+
) -> ExportResponse:
|
|
223
|
+
"""Export memories with optional tag/date filtering."""
|
|
224
|
+
from ..portability.embedding_metadata import (
|
|
225
|
+
create_embedding_metadata,
|
|
226
|
+
)
|
|
227
|
+
from ..services.import_export import (
|
|
228
|
+
ExportFilter,
|
|
229
|
+
export_memories,
|
|
230
|
+
parse_iso_datetime,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Validate dates
|
|
234
|
+
parsed_from, err = parse_iso_datetime(
|
|
235
|
+
request.date_from, "date_from",
|
|
236
|
+
)
|
|
237
|
+
if err:
|
|
238
|
+
return ExportResponse(success=False, error=err)
|
|
239
|
+
parsed_to, err = parse_iso_datetime(
|
|
240
|
+
request.date_to, "date_to",
|
|
241
|
+
)
|
|
242
|
+
if err:
|
|
243
|
+
return ExportResponse(success=False, error=err)
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
emb = service.embedding_service
|
|
247
|
+
meta = create_embedding_metadata(
|
|
248
|
+
model_name=getattr(emb, "model", "unknown"),
|
|
249
|
+
dimensions=getattr(emb, "dimensions", 1536),
|
|
250
|
+
provider="openai",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
flt = None
|
|
254
|
+
if request.tags or parsed_from or parsed_to:
|
|
255
|
+
flt = ExportFilter(
|
|
256
|
+
tags=request.tags,
|
|
257
|
+
date_from=parsed_from,
|
|
258
|
+
date_to=parsed_to,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
bundle = await export_memories(
|
|
262
|
+
store=service.vector_store,
|
|
263
|
+
embedding_metadata=meta,
|
|
264
|
+
filters=flt,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return ExportResponse(
|
|
268
|
+
success=True,
|
|
269
|
+
memory_count=bundle.manifest.memory_count,
|
|
270
|
+
bundle=bundle.to_dict(),
|
|
271
|
+
)
|
|
272
|
+
except Exception as e:
|
|
273
|
+
return ExportResponse(success=False, error=str(e))
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@router.post("/import", response_model=ImportResponse)
|
|
277
|
+
async def import_memories_route(
|
|
278
|
+
request: ImportRequest,
|
|
279
|
+
service: TribalMemoryService = Depends(get_memory_service),
|
|
280
|
+
) -> ImportResponse:
|
|
281
|
+
"""Import memories from a portable bundle."""
|
|
282
|
+
from ..portability.embedding_metadata import (
|
|
283
|
+
PortableBundle,
|
|
284
|
+
ReembeddingStrategy,
|
|
285
|
+
create_embedding_metadata,
|
|
286
|
+
)
|
|
287
|
+
from ..services.import_export import (
|
|
288
|
+
ConflictResolution,
|
|
289
|
+
import_memories,
|
|
290
|
+
validate_conflict_resolution,
|
|
291
|
+
validate_embedding_strategy,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Validate enum params
|
|
295
|
+
err = validate_conflict_resolution(
|
|
296
|
+
request.conflict_resolution,
|
|
297
|
+
)
|
|
298
|
+
if err:
|
|
299
|
+
return ImportResponse(success=False, error=err)
|
|
300
|
+
err = validate_embedding_strategy(
|
|
301
|
+
request.embedding_strategy,
|
|
302
|
+
)
|
|
303
|
+
if err:
|
|
304
|
+
return ImportResponse(success=False, error=err)
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
bundle = PortableBundle.from_dict(request.bundle)
|
|
308
|
+
except Exception as e:
|
|
309
|
+
return ImportResponse(
|
|
310
|
+
success=False, error=f"Invalid bundle: {e}",
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
emb = service.embedding_service
|
|
314
|
+
target_meta = create_embedding_metadata(
|
|
315
|
+
model_name=getattr(emb, "model", "unknown"),
|
|
316
|
+
dimensions=getattr(emb, "dimensions", 1536),
|
|
317
|
+
provider="openai",
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
cr_map = {
|
|
321
|
+
"skip": ConflictResolution.SKIP,
|
|
322
|
+
"overwrite": ConflictResolution.OVERWRITE,
|
|
323
|
+
"merge": ConflictResolution.MERGE,
|
|
324
|
+
}
|
|
325
|
+
es_map = {
|
|
326
|
+
"auto": ReembeddingStrategy.AUTO,
|
|
327
|
+
"keep": ReembeddingStrategy.KEEP,
|
|
328
|
+
"drop": ReembeddingStrategy.DROP,
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
summary = await import_memories(
|
|
333
|
+
bundle=bundle,
|
|
334
|
+
store=service.vector_store,
|
|
335
|
+
target_metadata=target_meta,
|
|
336
|
+
conflict_resolution=cr_map[
|
|
337
|
+
request.conflict_resolution
|
|
338
|
+
],
|
|
339
|
+
embedding_strategy=es_map[
|
|
340
|
+
request.embedding_strategy
|
|
341
|
+
],
|
|
342
|
+
dry_run=request.dry_run,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
return ImportResponse(
|
|
346
|
+
success=True,
|
|
347
|
+
total=summary.total,
|
|
348
|
+
imported=summary.imported,
|
|
349
|
+
skipped=summary.skipped,
|
|
350
|
+
overwritten=summary.overwritten,
|
|
351
|
+
errors=summary.errors,
|
|
352
|
+
needs_reembedding=summary.needs_reembedding,
|
|
353
|
+
dry_run=summary.dry_run,
|
|
354
|
+
duration_ms=round(summary.duration_ms, 1),
|
|
355
|
+
error_details=summary.error_details,
|
|
356
|
+
)
|
|
357
|
+
except Exception as e:
|
|
358
|
+
return ImportResponse(success=False, error=str(e))
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
@router.post("/shutdown", response_model=ShutdownResponse)
|
|
362
|
+
async def shutdown() -> ShutdownResponse:
|
|
363
|
+
"""Graceful shutdown endpoint.
|
|
364
|
+
|
|
365
|
+
Security note: This endpoint is localhost-only (bound to 127.0.0.1).
|
|
366
|
+
It allows local process management without authentication since only
|
|
367
|
+
processes on the same machine can reach it. For production deployments
|
|
368
|
+
with network exposure, use systemctl/signals instead of this endpoint.
|
|
369
|
+
"""
|
|
370
|
+
import asyncio
|
|
371
|
+
import signal
|
|
372
|
+
import os
|
|
373
|
+
|
|
374
|
+
# Schedule shutdown after response is sent
|
|
375
|
+
asyncio.get_event_loop().call_later(
|
|
376
|
+
0.5, lambda: os.kill(os.getpid(), signal.SIGTERM)
|
|
377
|
+
)
|
|
378
|
+
return ShutdownResponse(status="shutting_down")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Tribal Memory service implementations."""
|
|
2
|
+
|
|
3
|
+
from .embeddings import OpenAIEmbeddingService
|
|
4
|
+
from .vector_store import LanceDBVectorStore, InMemoryVectorStore
|
|
5
|
+
from .memory import TribalMemoryService, create_memory_service
|
|
6
|
+
from .deduplication import SemanticDeduplicationService
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"OpenAIEmbeddingService",
|
|
10
|
+
"LanceDBVectorStore",
|
|
11
|
+
"InMemoryVectorStore",
|
|
12
|
+
"TribalMemoryService",
|
|
13
|
+
"create_memory_service",
|
|
14
|
+
"SemanticDeduplicationService",
|
|
15
|
+
]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Semantic Deduplication Service."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from ..interfaces import IDeduplicationService, IVectorStore, IEmbeddingService
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SemanticDeduplicationService(IDeduplicationService):
|
|
9
|
+
"""Semantic deduplication using embedding similarity."""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
vector_store: IVectorStore,
|
|
14
|
+
embedding_service: IEmbeddingService,
|
|
15
|
+
exact_threshold: float = 0.98,
|
|
16
|
+
near_threshold: float = 0.90,
|
|
17
|
+
):
|
|
18
|
+
self.vector_store = vector_store
|
|
19
|
+
self.embedding_service = embedding_service
|
|
20
|
+
self.exact_threshold = exact_threshold
|
|
21
|
+
self.near_threshold = near_threshold
|
|
22
|
+
|
|
23
|
+
async def is_duplicate(
|
|
24
|
+
self,
|
|
25
|
+
content: str,
|
|
26
|
+
embedding: list[float],
|
|
27
|
+
threshold: Optional[float] = None
|
|
28
|
+
) -> tuple[bool, Optional[str]]:
|
|
29
|
+
"""Check if content is a duplicate.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Tuple of (is_duplicate, duplicate_of_id)
|
|
33
|
+
"""
|
|
34
|
+
threshold = threshold or self.exact_threshold
|
|
35
|
+
|
|
36
|
+
results = await self.vector_store.recall(
|
|
37
|
+
embedding,
|
|
38
|
+
limit=1,
|
|
39
|
+
min_similarity=threshold
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if results and results[0].similarity_score >= threshold:
|
|
43
|
+
return True, results[0].memory.id
|
|
44
|
+
|
|
45
|
+
return False, None
|
|
46
|
+
|
|
47
|
+
async def find_similar(
|
|
48
|
+
self,
|
|
49
|
+
content: str,
|
|
50
|
+
embedding: list[float],
|
|
51
|
+
threshold: Optional[float] = None,
|
|
52
|
+
limit: int = 10,
|
|
53
|
+
) -> list[tuple[str, float]]:
|
|
54
|
+
"""Find similar memories.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
List of (memory_id, similarity_score) tuples
|
|
58
|
+
"""
|
|
59
|
+
threshold = threshold or self.near_threshold
|
|
60
|
+
|
|
61
|
+
results = await self.vector_store.recall(
|
|
62
|
+
embedding,
|
|
63
|
+
limit=limit,
|
|
64
|
+
min_similarity=threshold
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return [(r.memory.id, r.similarity_score) for r in results]
|
|
68
|
+
|
|
69
|
+
async def get_duplicate_report(
|
|
70
|
+
self,
|
|
71
|
+
content: str,
|
|
72
|
+
embedding: list[float]
|
|
73
|
+
) -> dict:
|
|
74
|
+
"""Get detailed duplicate analysis report.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
content: Text content to analyze.
|
|
78
|
+
embedding: Pre-computed embedding vector for the content.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Dict with keys:
|
|
82
|
+
- is_duplicate (bool): True if above exact threshold
|
|
83
|
+
- is_near_duplicate (bool): True if above near threshold
|
|
84
|
+
- top_match (dict|None): Best matching memory with id, content preview, similarity
|
|
85
|
+
- candidates (list): Top 5 similar memories with id, similarity, content_preview
|
|
86
|
+
"""
|
|
87
|
+
results = await self.vector_store.recall(
|
|
88
|
+
embedding,
|
|
89
|
+
limit=5,
|
|
90
|
+
min_similarity=0.7
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if not results:
|
|
94
|
+
return {
|
|
95
|
+
"is_duplicate": False,
|
|
96
|
+
"is_near_duplicate": False,
|
|
97
|
+
"top_match": None,
|
|
98
|
+
"candidates": []
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
top = results[0]
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"is_duplicate": top.similarity_score >= self.exact_threshold,
|
|
105
|
+
"is_near_duplicate": top.similarity_score >= self.near_threshold,
|
|
106
|
+
"top_match": {
|
|
107
|
+
"id": top.memory.id,
|
|
108
|
+
"content": top.memory.content[:200] + "..." if len(top.memory.content) > 200 else top.memory.content,
|
|
109
|
+
"similarity": top.similarity_score,
|
|
110
|
+
},
|
|
111
|
+
"candidates": [
|
|
112
|
+
{"id": r.memory.id, "similarity": r.similarity_score, "content_preview": r.memory.content[:100]}
|
|
113
|
+
for r in results
|
|
114
|
+
],
|
|
115
|
+
}
|