tribalmemory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. tribalmemory/__init__.py +3 -0
  2. tribalmemory/a21/__init__.py +38 -0
  3. tribalmemory/a21/config/__init__.py +20 -0
  4. tribalmemory/a21/config/providers.py +104 -0
  5. tribalmemory/a21/config/system.py +184 -0
  6. tribalmemory/a21/container/__init__.py +8 -0
  7. tribalmemory/a21/container/container.py +212 -0
  8. tribalmemory/a21/providers/__init__.py +32 -0
  9. tribalmemory/a21/providers/base.py +241 -0
  10. tribalmemory/a21/providers/deduplication.py +99 -0
  11. tribalmemory/a21/providers/lancedb.py +232 -0
  12. tribalmemory/a21/providers/memory.py +128 -0
  13. tribalmemory/a21/providers/mock.py +54 -0
  14. tribalmemory/a21/providers/openai.py +151 -0
  15. tribalmemory/a21/providers/timestamp.py +88 -0
  16. tribalmemory/a21/system.py +293 -0
  17. tribalmemory/cli.py +298 -0
  18. tribalmemory/interfaces.py +306 -0
  19. tribalmemory/mcp/__init__.py +9 -0
  20. tribalmemory/mcp/__main__.py +6 -0
  21. tribalmemory/mcp/server.py +484 -0
  22. tribalmemory/performance/__init__.py +1 -0
  23. tribalmemory/performance/benchmarks.py +285 -0
  24. tribalmemory/performance/corpus_generator.py +171 -0
  25. tribalmemory/portability/__init__.py +1 -0
  26. tribalmemory/portability/embedding_metadata.py +320 -0
  27. tribalmemory/server/__init__.py +9 -0
  28. tribalmemory/server/__main__.py +6 -0
  29. tribalmemory/server/app.py +187 -0
  30. tribalmemory/server/config.py +115 -0
  31. tribalmemory/server/models.py +206 -0
  32. tribalmemory/server/routes.py +378 -0
  33. tribalmemory/services/__init__.py +15 -0
  34. tribalmemory/services/deduplication.py +115 -0
  35. tribalmemory/services/embeddings.py +273 -0
  36. tribalmemory/services/import_export.py +506 -0
  37. tribalmemory/services/memory.py +275 -0
  38. tribalmemory/services/vector_store.py +360 -0
  39. tribalmemory/testing/__init__.py +22 -0
  40. tribalmemory/testing/embedding_utils.py +110 -0
  41. tribalmemory/testing/fixtures.py +123 -0
  42. tribalmemory/testing/metrics.py +256 -0
  43. tribalmemory/testing/mocks.py +560 -0
  44. tribalmemory/testing/semantic_expansions.py +91 -0
  45. tribalmemory/utils.py +23 -0
  46. tribalmemory-0.1.0.dist-info/METADATA +275 -0
  47. tribalmemory-0.1.0.dist-info/RECORD +51 -0
  48. tribalmemory-0.1.0.dist-info/WHEEL +5 -0
  49. tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
  50. tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
  51. tribalmemory-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,54 @@
1
+ """Mock providers for testing."""
2
+
3
+ from datetime import datetime
4
+ from typing import Optional
5
+
6
+ from .base import (
7
+ EmbeddingProvider,
8
+ StorageProvider,
9
+ ProviderHealth,
10
+ ProviderStatus,
11
+ )
12
+ from ..config.providers import EmbeddingConfig
13
+ from ...testing.embedding_utils import hash_to_embedding
14
+
15
+
16
+ class MockEmbeddingProvider(EmbeddingProvider[EmbeddingConfig]):
17
+ """Mock embedding provider using deterministic hashing."""
18
+
19
+ def __init__(self, config: EmbeddingConfig):
20
+ super().__init__(config)
21
+
22
+ @property
23
+ def dimensions(self) -> int:
24
+ return self.config.dimensions
25
+
26
+ @property
27
+ def model_name(self) -> str:
28
+ return "mock-embedding"
29
+
30
+ async def initialize(self) -> None:
31
+ self._initialized = True
32
+
33
+ async def shutdown(self) -> None:
34
+ self._initialized = False
35
+
36
+ async def health_check(self) -> ProviderHealth:
37
+ return ProviderHealth(
38
+ status=ProviderStatus.HEALTHY,
39
+ latency_ms=0.1,
40
+ message="Mock provider always healthy"
41
+ )
42
+
43
+ async def embed(self, text: str) -> list[float]:
44
+ return self._hash_to_embedding(text)
45
+
46
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
47
+ return [self._hash_to_embedding(t) for t in texts]
48
+
49
+ def _hash_to_embedding(self, text: str) -> list[float]:
50
+ """Convert text to deterministic embedding that preserves semantic similarity.
51
+
52
+ Delegates to shared utility for consistent behavior across mock implementations.
53
+ """
54
+ return hash_to_embedding(text, self.dimensions)
@@ -0,0 +1,151 @@
1
+ """OpenAI Embedding Provider."""
2
+
3
+ import asyncio
4
+ from datetime import datetime
5
+ from typing import Optional
6
+ import httpx
7
+
8
+ from .base import EmbeddingProvider, ProviderHealth, ProviderStatus
9
+ from ..config.providers import EmbeddingConfig
10
+ from ...utils import normalize_embedding
11
+
12
+
13
+ class OpenAIEmbeddingProvider(EmbeddingProvider[EmbeddingConfig]):
14
+ """OpenAI embedding provider implementation."""
15
+
16
+ API_URL = "https://api.openai.com/v1/embeddings"
17
+
18
+ def __init__(self, config: EmbeddingConfig):
19
+ super().__init__(config)
20
+ self._client: Optional[httpx.AsyncClient] = None
21
+
22
+ def __repr__(self) -> str:
23
+ """Safe repr that masks API key to prevent accidental logging."""
24
+ return f"OpenAIEmbeddingProvider(model={self.config.model!r}, api_key=***)"
25
+
26
+ @property
27
+ def dimensions(self) -> int:
28
+ return self.config.dimensions
29
+
30
+ @property
31
+ def model_name(self) -> str:
32
+ return self.config.model
33
+
34
+ async def initialize(self) -> None:
35
+ """Initialize the OpenAI client.
36
+
37
+ Creates an async HTTP client for API requests.
38
+ Ensures cleanup if initialization fails partway through.
39
+
40
+ Raises:
41
+ ValueError: If API key is not configured
42
+ """
43
+ if not self.config.api_key:
44
+ raise ValueError("OpenAI API key required")
45
+
46
+ client = None
47
+ try:
48
+ client = httpx.AsyncClient(
49
+ timeout=httpx.Timeout(self.config.timeout_seconds),
50
+ headers={
51
+ "Authorization": f"Bearer {self.config.api_key}",
52
+ "Content-Type": "application/json",
53
+ }
54
+ )
55
+ self._client = client
56
+ self._initialized = True
57
+ except Exception:
58
+ # Ensure cleanup on partial initialization failure
59
+ if client:
60
+ await client.aclose()
61
+ raise
62
+
63
+ async def shutdown(self) -> None:
64
+ if self._client:
65
+ await self._client.aclose()
66
+ self._client = None
67
+ self._initialized = False
68
+
69
+ async def health_check(self) -> ProviderHealth:
70
+ if not self._client:
71
+ return ProviderHealth(
72
+ status=ProviderStatus.UNAVAILABLE,
73
+ message="Client not initialized"
74
+ )
75
+
76
+ try:
77
+ start = datetime.utcnow()
78
+ await self.embed("health check")
79
+ latency = (datetime.utcnow() - start).total_seconds() * 1000
80
+ return ProviderHealth(
81
+ status=ProviderStatus.HEALTHY,
82
+ latency_ms=latency
83
+ )
84
+ except Exception as e:
85
+ return ProviderHealth(
86
+ status=ProviderStatus.UNAVAILABLE,
87
+ message=str(e)
88
+ )
89
+
90
+ async def embed(self, text: str) -> list[float]:
91
+ results = await self.embed_batch([text])
92
+ return results[0]
93
+
94
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
95
+ if not texts:
96
+ return []
97
+
98
+ if not self._client:
99
+ raise RuntimeError("Provider not initialized")
100
+
101
+ # Clean texts
102
+ cleaned = [self._clean_text(t) for t in texts]
103
+
104
+ payload = {
105
+ "model": self.config.model,
106
+ "input": cleaned,
107
+ "dimensions": self.config.dimensions,
108
+ }
109
+
110
+ last_error = None
111
+ for attempt in range(self.config.max_retries):
112
+ try:
113
+ response = await self._client.post(self.API_URL, json=payload)
114
+
115
+ if response.status_code == 200:
116
+ data = response.json()
117
+ embeddings = sorted(data["data"], key=lambda x: x["index"])
118
+ return [normalize_embedding(e["embedding"]) for e in embeddings]
119
+
120
+ elif response.status_code == 429:
121
+ retry_after = int(response.headers.get("Retry-After", 5))
122
+ await asyncio.sleep(retry_after)
123
+ continue
124
+
125
+ elif response.status_code >= 500:
126
+ backoff = min(self.config.backoff_base ** attempt, self.config.backoff_max)
127
+ await asyncio.sleep(backoff)
128
+ continue
129
+
130
+ else:
131
+ error = response.json().get("error", {}).get("message", response.text)
132
+ raise RuntimeError(f"OpenAI API error ({response.status_code}): {error}")
133
+
134
+ except httpx.TimeoutException as e:
135
+ last_error = e
136
+ backoff = min(self.config.backoff_base ** attempt, self.config.backoff_max)
137
+ await asyncio.sleep(backoff)
138
+ except httpx.RequestError as e:
139
+ last_error = e
140
+ backoff = min(self.config.backoff_base ** attempt, self.config.backoff_max)
141
+ await asyncio.sleep(backoff)
142
+
143
+ raise RuntimeError(f"OpenAI API failed after {self.config.max_retries} retries: {last_error}")
144
+
145
+ def _clean_text(self, text: str) -> str:
146
+ cleaned = " ".join(text.split())
147
+ max_bytes = 8191 * 4
148
+ encoded = cleaned.encode('utf-8')
149
+ if len(encoded) > max_bytes:
150
+ cleaned = encoded[:max_bytes].decode('utf-8', errors='ignore')
151
+ return cleaned
@@ -0,0 +1,88 @@
1
+ """Timestamp providers."""
2
+
3
+ import hashlib
4
+ from datetime import datetime
5
+ from typing import Optional
6
+
7
+ from .base import TimestampProvider, ProviderHealth, ProviderStatus
8
+ from ..config.providers import TimestampConfig
9
+
10
+
11
+ class RFC3161TimestampProvider(TimestampProvider[TimestampConfig]):
12
+ """RFC 3161 Time Stamp Authority provider.
13
+
14
+ TODO: Implement actual RFC 3161 integration.
15
+ For now, this is a placeholder that matches the interface.
16
+ """
17
+
18
+ async def initialize(self) -> None:
19
+ if not self.config.tsa_url:
20
+ raise ValueError("TSA URL required for RFC 3161 provider")
21
+ self._initialized = True
22
+
23
+ async def shutdown(self) -> None:
24
+ self._initialized = False
25
+
26
+ async def health_check(self) -> ProviderHealth:
27
+ # TODO: Actually ping the TSA
28
+ return ProviderHealth(
29
+ status=ProviderStatus.HEALTHY,
30
+ message=f"RFC 3161 TSA at {self.config.tsa_url}"
31
+ )
32
+
33
+ async def timestamp(self, data: bytes) -> bytes:
34
+ # TODO: Implement actual RFC 3161 timestamp request
35
+ raise NotImplementedError("RFC 3161 implementation pending")
36
+
37
+ async def verify(self, data: bytes, token: bytes) -> tuple[bool, Optional[datetime]]:
38
+ # TODO: Implement actual RFC 3161 verification
39
+ raise NotImplementedError("RFC 3161 implementation pending")
40
+
41
+
42
+ class MockTimestampProvider(TimestampProvider[TimestampConfig]):
43
+ """Mock timestamp provider for testing."""
44
+
45
+ def __init__(self, config: TimestampConfig):
46
+ super().__init__(config)
47
+ self._timestamps: dict[bytes, datetime] = {}
48
+
49
+ async def initialize(self) -> None:
50
+ self._initialized = True
51
+
52
+ async def shutdown(self) -> None:
53
+ self._timestamps.clear()
54
+ self._initialized = False
55
+
56
+ async def health_check(self) -> ProviderHealth:
57
+ return ProviderHealth(
58
+ status=ProviderStatus.HEALTHY,
59
+ message="Mock timestamp provider"
60
+ )
61
+
62
+ async def timestamp(self, data: bytes) -> bytes:
63
+ now = datetime.utcnow()
64
+ data_hash = hashlib.sha256(data).hexdigest()[:16]
65
+ token = f"MOCK_TSA|{now.isoformat()}|{data_hash}".encode()
66
+ self._timestamps[token] = now
67
+ return token
68
+
69
+ async def verify(self, data: bytes, token: bytes) -> tuple[bool, Optional[datetime]]:
70
+ try:
71
+ decoded = token.decode()
72
+ if not decoded.startswith("MOCK_TSA|"):
73
+ return False, None
74
+
75
+ parts = decoded.split("|")
76
+ if len(parts) != 3:
77
+ return False, None
78
+
79
+ timestamp_str = parts[1]
80
+ stored_hash = parts[2]
81
+
82
+ actual_hash = hashlib.sha256(data).hexdigest()[:16]
83
+ if actual_hash != stored_hash:
84
+ return False, None
85
+
86
+ return True, datetime.fromisoformat(timestamp_str)
87
+ except Exception:
88
+ return False, None
@@ -0,0 +1,293 @@
1
+ """Memory System - High-level API for A2.1.
2
+
3
+ This is the main entry point for interacting with tribal memory.
4
+ It provides a clean, high-level interface while delegating to
5
+ the underlying providers through the container.
6
+ """
7
+
8
+ import uuid
9
+ from datetime import datetime
10
+ from typing import Any, Optional
11
+
12
+ from .config import SystemConfig
13
+ from .container import Container
14
+ from ..interfaces import MemoryEntry, MemorySource, RecallResult, StoreResult
15
+
16
+
17
+ class MemorySystem:
18
+ """High-level memory system API.
19
+
20
+ This class provides a simple, clean interface for memory operations
21
+ while managing all the underlying complexity through the container.
22
+
23
+ Usage:
24
+ config = SystemConfig.from_env()
25
+ system = MemorySystem(config)
26
+
27
+ async with system:
28
+ await system.remember("Important fact")
29
+ results = await system.recall("What was that fact?")
30
+
31
+ Or manually:
32
+ system = MemorySystem(config)
33
+ await system.start()
34
+ try:
35
+ await system.remember("Important fact")
36
+ finally:
37
+ await system.stop()
38
+ """
39
+
40
+ def __init__(self, config: SystemConfig):
41
+ """Initialize memory system.
42
+
43
+ Args:
44
+ config: System configuration
45
+ """
46
+ self.config = config
47
+ self._container = Container(config)
48
+ self._started = False
49
+
50
+ async def start(self) -> None:
51
+ """Start the memory system."""
52
+ if self._started:
53
+ return
54
+
55
+ # Validate config
56
+ errors = self.config.validate()
57
+ if errors:
58
+ raise ValueError(f"Invalid configuration: {errors}")
59
+
60
+ await self._container.initialize()
61
+ self._started = True
62
+
63
+ async def stop(self) -> None:
64
+ """Stop the memory system."""
65
+ if not self._started:
66
+ return
67
+ await self._container.shutdown()
68
+ self._started = False
69
+
70
+ async def remember(
71
+ self,
72
+ content: str,
73
+ source_type: MemorySource = MemorySource.AUTO_CAPTURE,
74
+ context: Optional[str] = None,
75
+ tags: Optional[list[str]] = None,
76
+ skip_dedup: bool = False,
77
+ ) -> StoreResult:
78
+ """Store a new memory.
79
+
80
+ Args:
81
+ content: The memory content
82
+ source_type: How this memory was captured
83
+ context: Additional context about capture
84
+ tags: Tags for categorization
85
+ skip_dedup: Skip duplicate checking
86
+
87
+ Returns:
88
+ StoreResult with success status
89
+ """
90
+ self._ensure_started()
91
+
92
+ # Validate
93
+ if not content or not content.strip():
94
+ return StoreResult(success=False, error="Empty content not allowed")
95
+
96
+ content = content.strip()
97
+
98
+ # Generate embedding
99
+ try:
100
+ embedding = await self._container.embedding.embed(content)
101
+ except Exception as e:
102
+ return StoreResult(success=False, error=f"Embedding failed: {e}")
103
+
104
+ # Check for duplicates
105
+ if not skip_dedup and self._container.deduplication:
106
+ is_dup, dup_id = await self._container.deduplication.is_duplicate(
107
+ content, embedding
108
+ )
109
+ if is_dup:
110
+ return StoreResult(success=False, duplicate_of=dup_id)
111
+
112
+ # Create entry
113
+ entry = MemoryEntry(
114
+ id=str(uuid.uuid4()),
115
+ content=content,
116
+ embedding=embedding,
117
+ source_instance=self.config.instance_id,
118
+ source_type=source_type,
119
+ created_at=datetime.utcnow(),
120
+ updated_at=datetime.utcnow(),
121
+ tags=tags or [],
122
+ context=context,
123
+ confidence=1.0,
124
+ )
125
+
126
+ return await self._container.storage.store(entry)
127
+
128
+ async def recall(
129
+ self,
130
+ query: str,
131
+ limit: int = 5,
132
+ min_relevance: float = 0.7,
133
+ tags: Optional[list[str]] = None,
134
+ ) -> list[RecallResult]:
135
+ """Recall relevant memories.
136
+
137
+ Args:
138
+ query: Natural language query
139
+ limit: Maximum results
140
+ min_relevance: Minimum similarity score
141
+ tags: Filter by tags
142
+
143
+ Returns:
144
+ List of RecallResults sorted by relevance
145
+ """
146
+ self._ensure_started()
147
+
148
+ try:
149
+ query_embedding = await self._container.embedding.embed(query)
150
+ except Exception:
151
+ return []
152
+
153
+ filters = {"tags": tags} if tags else None
154
+
155
+ results = await self._container.storage.recall(
156
+ query_embedding,
157
+ limit=limit,
158
+ min_similarity=min_relevance,
159
+ filters=filters,
160
+ )
161
+
162
+ return self._filter_superseded(results)
163
+
164
+ async def correct(
165
+ self,
166
+ original_id: str,
167
+ corrected_content: str,
168
+ context: Optional[str] = None,
169
+ ) -> StoreResult:
170
+ """Store a correction to an existing memory.
171
+
172
+ Args:
173
+ original_id: ID of memory being corrected
174
+ corrected_content: The corrected information
175
+ context: Why this correction was made
176
+
177
+ Returns:
178
+ StoreResult for the correction entry
179
+ """
180
+ self._ensure_started()
181
+
182
+ # Verify original exists
183
+ original = await self._container.storage.get(original_id)
184
+ if not original:
185
+ return StoreResult(success=False, error=f"Original memory {original_id} not found")
186
+
187
+ # Generate embedding
188
+ try:
189
+ embedding = await self._container.embedding.embed(corrected_content)
190
+ except Exception as e:
191
+ return StoreResult(success=False, error=f"Embedding failed: {e}")
192
+
193
+ # Create correction entry
194
+ entry = MemoryEntry(
195
+ id=str(uuid.uuid4()),
196
+ content=corrected_content,
197
+ embedding=embedding,
198
+ source_instance=self.config.instance_id,
199
+ source_type=MemorySource.CORRECTION,
200
+ created_at=datetime.utcnow(),
201
+ updated_at=datetime.utcnow(),
202
+ tags=original.tags,
203
+ context=context or f"Correction of {original_id}",
204
+ confidence=1.0,
205
+ supersedes=original_id,
206
+ related_to=[original_id],
207
+ )
208
+
209
+ return await self._container.storage.store(entry)
210
+
211
+ async def forget(self, memory_id: str) -> bool:
212
+ """Forget (soft delete) a memory.
213
+
214
+ Args:
215
+ memory_id: ID of memory to forget
216
+
217
+ Returns:
218
+ True if forgotten successfully
219
+ """
220
+ self._ensure_started()
221
+ return await self._container.storage.delete(memory_id)
222
+
223
+ async def get(self, memory_id: str) -> Optional[MemoryEntry]:
224
+ """Get a specific memory by ID.
225
+
226
+ Args:
227
+ memory_id: Memory ID
228
+
229
+ Returns:
230
+ MemoryEntry or None if not found
231
+ """
232
+ self._ensure_started()
233
+ return await self._container.storage.get(memory_id)
234
+
235
+ async def health(self) -> dict[str, Any]:
236
+ """Check system health.
237
+
238
+ Returns:
239
+ Dict with provider health statuses including:
240
+ - status: "running" or "stopped"
241
+ - instance_id: This instance's ID
242
+ - providers: Dict of provider name to health info
243
+ """
244
+ if not self._started:
245
+ return {"status": "stopped"}
246
+
247
+ health = await self._container.health_check()
248
+ return {
249
+ "status": "running",
250
+ "instance_id": self.config.instance_id,
251
+ "providers": {
252
+ name: {"status": h.status.value, "latency_ms": h.latency_ms}
253
+ for name, h in health.items()
254
+ }
255
+ }
256
+
257
+ async def stats(self) -> dict[str, Any]:
258
+ """Get memory statistics.
259
+
260
+ Returns:
261
+ Dict with memory counts and breakdowns including:
262
+ - total_memories: Total count of active memories
263
+ - instance_id: This instance's ID
264
+ """
265
+ self._ensure_started()
266
+
267
+ total = await self._container.storage.count()
268
+
269
+ return {
270
+ "total_memories": total,
271
+ "instance_id": self.config.instance_id,
272
+ }
273
+
274
+ def _ensure_started(self) -> None:
275
+ if not self._started:
276
+ raise RuntimeError("MemorySystem not started. Call start() first.")
277
+
278
+ @staticmethod
279
+ def _filter_superseded(results: list[RecallResult]) -> list[RecallResult]:
280
+ """Remove memories that are superseded by corrections in the result set."""
281
+ superseded_ids = {
282
+ r.memory.supersedes for r in results if r.memory.supersedes
283
+ }
284
+ if not superseded_ids:
285
+ return results
286
+ return [r for r in results if r.memory.id not in superseded_ids]
287
+
288
+ async def __aenter__(self):
289
+ await self.start()
290
+ return self
291
+
292
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
293
+ await self.stop()