tribalmemory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tribalmemory/__init__.py +3 -0
- tribalmemory/a21/__init__.py +38 -0
- tribalmemory/a21/config/__init__.py +20 -0
- tribalmemory/a21/config/providers.py +104 -0
- tribalmemory/a21/config/system.py +184 -0
- tribalmemory/a21/container/__init__.py +8 -0
- tribalmemory/a21/container/container.py +212 -0
- tribalmemory/a21/providers/__init__.py +32 -0
- tribalmemory/a21/providers/base.py +241 -0
- tribalmemory/a21/providers/deduplication.py +99 -0
- tribalmemory/a21/providers/lancedb.py +232 -0
- tribalmemory/a21/providers/memory.py +128 -0
- tribalmemory/a21/providers/mock.py +54 -0
- tribalmemory/a21/providers/openai.py +151 -0
- tribalmemory/a21/providers/timestamp.py +88 -0
- tribalmemory/a21/system.py +293 -0
- tribalmemory/cli.py +298 -0
- tribalmemory/interfaces.py +306 -0
- tribalmemory/mcp/__init__.py +9 -0
- tribalmemory/mcp/__main__.py +6 -0
- tribalmemory/mcp/server.py +484 -0
- tribalmemory/performance/__init__.py +1 -0
- tribalmemory/performance/benchmarks.py +285 -0
- tribalmemory/performance/corpus_generator.py +171 -0
- tribalmemory/portability/__init__.py +1 -0
- tribalmemory/portability/embedding_metadata.py +320 -0
- tribalmemory/server/__init__.py +9 -0
- tribalmemory/server/__main__.py +6 -0
- tribalmemory/server/app.py +187 -0
- tribalmemory/server/config.py +115 -0
- tribalmemory/server/models.py +206 -0
- tribalmemory/server/routes.py +378 -0
- tribalmemory/services/__init__.py +15 -0
- tribalmemory/services/deduplication.py +115 -0
- tribalmemory/services/embeddings.py +273 -0
- tribalmemory/services/import_export.py +506 -0
- tribalmemory/services/memory.py +275 -0
- tribalmemory/services/vector_store.py +360 -0
- tribalmemory/testing/__init__.py +22 -0
- tribalmemory/testing/embedding_utils.py +110 -0
- tribalmemory/testing/fixtures.py +123 -0
- tribalmemory/testing/metrics.py +256 -0
- tribalmemory/testing/mocks.py +560 -0
- tribalmemory/testing/semantic_expansions.py +91 -0
- tribalmemory/utils.py +23 -0
- tribalmemory-0.1.0.dist-info/METADATA +275 -0
- tribalmemory-0.1.0.dist-info/RECORD +51 -0
- tribalmemory-0.1.0.dist-info/WHEEL +5 -0
- tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
- tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
- tribalmemory-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""OpenAI Embedding Service.
|
|
2
|
+
|
|
3
|
+
Production embedding service using OpenAI's text-embedding-3-small model.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import math
|
|
8
|
+
import os
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from ..interfaces import IEmbeddingService
|
|
14
|
+
from ..utils import normalize_embedding
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OpenAIEmbeddingService(IEmbeddingService):
|
|
18
|
+
"""OpenAI-compatible embedding service.
|
|
19
|
+
|
|
20
|
+
Supports OpenAI, Ollama, and any OpenAI-compatible embedding API.
|
|
21
|
+
|
|
22
|
+
Features:
|
|
23
|
+
- Async HTTP calls with retry logic
|
|
24
|
+
- Batch embedding support
|
|
25
|
+
- Rate limiting awareness
|
|
26
|
+
- Configurable model, dimensions, and API base URL
|
|
27
|
+
- Local-only mode (Ollama) — no API key needed
|
|
28
|
+
|
|
29
|
+
Usage:
|
|
30
|
+
# OpenAI
|
|
31
|
+
service = OpenAIEmbeddingService(api_key="sk-...")
|
|
32
|
+
|
|
33
|
+
# Ollama (local)
|
|
34
|
+
service = OpenAIEmbeddingService(
|
|
35
|
+
api_base="http://localhost:11434/v1",
|
|
36
|
+
model="nomic-embed-text",
|
|
37
|
+
dimensions=768,
|
|
38
|
+
)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
DEFAULT_MODEL = "text-embedding-3-small"
|
|
42
|
+
DEFAULT_DIMENSIONS = 1536
|
|
43
|
+
DEFAULT_API_BASE = "https://api.openai.com/v1"
|
|
44
|
+
LOCAL_API_KEY_PLACEHOLDER = "local-no-key-needed"
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
api_key: Optional[str] = None,
|
|
49
|
+
model: str = DEFAULT_MODEL,
|
|
50
|
+
dimensions: int = DEFAULT_DIMENSIONS,
|
|
51
|
+
max_retries: int = 3,
|
|
52
|
+
timeout_seconds: float = 30.0,
|
|
53
|
+
backoff_base: float = 2.0,
|
|
54
|
+
backoff_max: float = 60.0,
|
|
55
|
+
api_base: Optional[str] = None,
|
|
56
|
+
):
|
|
57
|
+
"""Initialize OpenAI-compatible embedding service.
|
|
58
|
+
|
|
59
|
+
Supports OpenAI, Ollama, and any OpenAI-compatible embedding API.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
api_key: API key. Falls back to OPENAI_API_KEY env var.
|
|
63
|
+
Not required when api_base points to a local service (e.g., Ollama).
|
|
64
|
+
model: Embedding model to use.
|
|
65
|
+
dimensions: Output embedding dimensions.
|
|
66
|
+
max_retries: Max retry attempts on transient failures.
|
|
67
|
+
timeout_seconds: Request timeout.
|
|
68
|
+
backoff_base: Base for exponential backoff (default 2.0).
|
|
69
|
+
backoff_max: Maximum backoff delay in seconds (default 60.0).
|
|
70
|
+
api_base: Base URL for the embedding API. Defaults to OpenAI.
|
|
71
|
+
For Ollama: "http://localhost:11434/v1"
|
|
72
|
+
For any OpenAI-compatible API: "http://host:port/v1"
|
|
73
|
+
|
|
74
|
+
Security Note:
|
|
75
|
+
API keys are stored in memory and used in HTTP headers. Never log
|
|
76
|
+
the _client object or include it in error reports. For production,
|
|
77
|
+
consider using a secrets manager rather than environment variables.
|
|
78
|
+
"""
|
|
79
|
+
# Validate dimensions
|
|
80
|
+
if dimensions < 1 or dimensions > 8192:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Dimensions must be between 1 and 8192, got {dimensions}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Build the API URL from api_base
|
|
86
|
+
self.api_url = self._resolve_api_url(api_base)
|
|
87
|
+
|
|
88
|
+
# Determine if this is a local (non-OpenAI) service
|
|
89
|
+
is_local = (
|
|
90
|
+
api_base is not None
|
|
91
|
+
and api_base.strip() != ""
|
|
92
|
+
and "api.openai.com" not in api_base.lower()
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# For local services (non-OpenAI), api_key is optional
|
|
96
|
+
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
97
|
+
self.backoff_base = backoff_base
|
|
98
|
+
self.backoff_max = backoff_max
|
|
99
|
+
if not self.api_key:
|
|
100
|
+
if not is_local:
|
|
101
|
+
raise ValueError(
|
|
102
|
+
"OpenAI API key required. Pass api_key "
|
|
103
|
+
"or set OPENAI_API_KEY env var."
|
|
104
|
+
)
|
|
105
|
+
# Use a placeholder for local services (e.g., Ollama)
|
|
106
|
+
self.api_key = self.LOCAL_API_KEY_PLACEHOLDER
|
|
107
|
+
|
|
108
|
+
self.model = model
|
|
109
|
+
self.dimensions = dimensions
|
|
110
|
+
self.max_retries = max_retries
|
|
111
|
+
self.timeout_seconds = timeout_seconds
|
|
112
|
+
|
|
113
|
+
self._client: Optional[httpx.AsyncClient] = None
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def _resolve_api_url(api_base: Optional[str] = None) -> str:
|
|
117
|
+
"""Resolve the full embeddings API URL from an optional base.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
api_base: Base URL (e.g., "http://localhost:11434/v1").
|
|
121
|
+
If None, uses OpenAI default.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Full URL ending in /embeddings.
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
ValueError: If api_base is not a valid HTTP(S) URL.
|
|
128
|
+
"""
|
|
129
|
+
if api_base is None or api_base.strip() == "":
|
|
130
|
+
return (
|
|
131
|
+
f"{OpenAIEmbeddingService.DEFAULT_API_BASE}"
|
|
132
|
+
"/embeddings"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
base = api_base.strip().rstrip("/")
|
|
136
|
+
|
|
137
|
+
# Basic URL validation
|
|
138
|
+
if base and not base.startswith(("http://", "https://")):
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"api_base must be an HTTP(S) URL, got: {base}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# If already ends with /embeddings, use as-is
|
|
144
|
+
if base.endswith("/embeddings"):
|
|
145
|
+
return base
|
|
146
|
+
|
|
147
|
+
return f"{base}/embeddings"
|
|
148
|
+
|
|
149
|
+
def __repr__(self) -> str:
|
|
150
|
+
"""Safe repr that masks API key to prevent accidental logging."""
|
|
151
|
+
return f"OpenAIEmbeddingService(model={self.model!r}, api_key=***)"
|
|
152
|
+
|
|
153
|
+
async def _get_client(self) -> httpx.AsyncClient:
|
|
154
|
+
"""Get or create HTTP client."""
|
|
155
|
+
if self._client is None or self._client.is_closed:
|
|
156
|
+
self._client = httpx.AsyncClient(
|
|
157
|
+
timeout=httpx.Timeout(self.timeout_seconds),
|
|
158
|
+
headers={
|
|
159
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
160
|
+
"Content-Type": "application/json",
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
return self._client
|
|
164
|
+
|
|
165
|
+
async def embed(self, text: str) -> list[float]:
|
|
166
|
+
"""Generate embedding for a single text."""
|
|
167
|
+
results = await self.embed_batch([text])
|
|
168
|
+
return results[0]
|
|
169
|
+
|
|
170
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
171
|
+
"""Generate embeddings for multiple texts."""
|
|
172
|
+
if not texts:
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
cleaned_texts = [self._clean_text(t) for t in texts]
|
|
176
|
+
client = await self._get_client()
|
|
177
|
+
|
|
178
|
+
payload = {
|
|
179
|
+
"model": self.model,
|
|
180
|
+
"input": cleaned_texts,
|
|
181
|
+
"dimensions": self.dimensions,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
last_error = None
|
|
185
|
+
for attempt in range(self.max_retries):
|
|
186
|
+
try:
|
|
187
|
+
response = await client.post(self.api_url, json=payload)
|
|
188
|
+
|
|
189
|
+
if response.status_code == 200:
|
|
190
|
+
data = response.json()
|
|
191
|
+
embeddings = sorted(data["data"], key=lambda x: x["index"])
|
|
192
|
+
return [normalize_embedding(e["embedding"]) for e in embeddings]
|
|
193
|
+
|
|
194
|
+
elif response.status_code == 429:
|
|
195
|
+
retry_after = int(response.headers.get("Retry-After", 5))
|
|
196
|
+
await asyncio.sleep(retry_after)
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
elif response.status_code >= 500:
|
|
200
|
+
backoff = min(self.backoff_base ** attempt, self.backoff_max)
|
|
201
|
+
await asyncio.sleep(backoff)
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
else:
|
|
205
|
+
try:
|
|
206
|
+
error_detail = (
|
|
207
|
+
response.json()
|
|
208
|
+
.get("error", {})
|
|
209
|
+
.get("message", response.text)
|
|
210
|
+
)
|
|
211
|
+
except Exception:
|
|
212
|
+
error_detail = response.text[:200]
|
|
213
|
+
raise RuntimeError(
|
|
214
|
+
f"Embedding API error "
|
|
215
|
+
f"({response.status_code}): "
|
|
216
|
+
f"{error_detail}"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
except httpx.TimeoutException as e:
|
|
220
|
+
last_error = e
|
|
221
|
+
backoff = min(self.backoff_base ** attempt, self.backoff_max)
|
|
222
|
+
await asyncio.sleep(backoff)
|
|
223
|
+
except httpx.RequestError as e:
|
|
224
|
+
last_error = e
|
|
225
|
+
backoff = min(self.backoff_base ** attempt, self.backoff_max)
|
|
226
|
+
await asyncio.sleep(backoff)
|
|
227
|
+
|
|
228
|
+
raise RuntimeError(f"OpenAI API failed after {self.max_retries} retries: {last_error}")
|
|
229
|
+
|
|
230
|
+
def similarity(self, a: list[float], b: list[float]) -> float:
|
|
231
|
+
"""Calculate cosine similarity between two embeddings."""
|
|
232
|
+
if len(a) != len(b):
|
|
233
|
+
raise ValueError(f"Embedding dimensions don't match: {len(a)} vs {len(b)}")
|
|
234
|
+
|
|
235
|
+
dot_product = sum(x * y for x, y in zip(a, b))
|
|
236
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
237
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
238
|
+
|
|
239
|
+
if norm_a == 0 or norm_b == 0:
|
|
240
|
+
return 0.0
|
|
241
|
+
|
|
242
|
+
return dot_product / (norm_a * norm_b)
|
|
243
|
+
|
|
244
|
+
def _clean_text(self, text: str) -> str:
|
|
245
|
+
"""Clean text for embedding.
|
|
246
|
+
|
|
247
|
+
Normalizes whitespace and truncates to fit within token limits.
|
|
248
|
+
Uses encoding-aware truncation to avoid splitting UTF-8 characters.
|
|
249
|
+
"""
|
|
250
|
+
cleaned = " ".join(text.split())
|
|
251
|
+
|
|
252
|
+
# OpenAI has token limits; truncate very long texts
|
|
253
|
+
# text-embedding-3-small supports 8191 tokens
|
|
254
|
+
# Rough estimate: 4 bytes per token (worst case UTF-8)
|
|
255
|
+
max_bytes = 8191 * 4
|
|
256
|
+
|
|
257
|
+
encoded = cleaned.encode('utf-8')
|
|
258
|
+
if len(encoded) > max_bytes:
|
|
259
|
+
# Truncate bytes and decode safely (ignore partial chars)
|
|
260
|
+
cleaned = encoded[:max_bytes].decode('utf-8', errors='ignore')
|
|
261
|
+
|
|
262
|
+
return cleaned
|
|
263
|
+
|
|
264
|
+
async def close(self):
|
|
265
|
+
"""Close the HTTP client."""
|
|
266
|
+
if self._client and not self._client.is_closed:
|
|
267
|
+
await self._client.aclose()
|
|
268
|
+
|
|
269
|
+
async def __aenter__(self):
|
|
270
|
+
return self
|
|
271
|
+
|
|
272
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
273
|
+
await self.close()
|