spanforge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +815 -0
- spanforge/_ansi.py +93 -0
- spanforge/_batch_exporter.py +409 -0
- spanforge/_cli.py +2094 -0
- spanforge/_cli_audit.py +639 -0
- spanforge/_cli_compliance.py +711 -0
- spanforge/_cli_cost.py +243 -0
- spanforge/_cli_ops.py +791 -0
- spanforge/_cli_phase11.py +356 -0
- spanforge/_hooks.py +337 -0
- spanforge/_server.py +1708 -0
- spanforge/_span.py +1036 -0
- spanforge/_store.py +288 -0
- spanforge/_stream.py +664 -0
- spanforge/_trace.py +335 -0
- spanforge/_tracer.py +254 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +469 -0
- spanforge/auto.py +464 -0
- spanforge/baseline.py +335 -0
- spanforge/cache.py +635 -0
- spanforge/compliance.py +325 -0
- spanforge/config.py +532 -0
- spanforge/consent.py +228 -0
- spanforge/consumer.py +377 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1254 -0
- spanforge/cost.py +600 -0
- spanforge/debug.py +548 -0
- spanforge/deprecations.py +205 -0
- spanforge/drift.py +482 -0
- spanforge/egress.py +58 -0
- spanforge/eval.py +648 -0
- spanforge/event.py +1064 -0
- spanforge/exceptions.py +240 -0
- spanforge/explain.py +178 -0
- spanforge/export/__init__.py +69 -0
- spanforge/export/append_only.py +337 -0
- spanforge/export/cloud.py +357 -0
- spanforge/export/datadog.py +497 -0
- spanforge/export/grafana.py +320 -0
- spanforge/export/jsonl.py +195 -0
- spanforge/export/openinference.py +158 -0
- spanforge/export/otel_bridge.py +294 -0
- spanforge/export/otlp.py +811 -0
- spanforge/export/otlp_bridge.py +233 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/siem_schema.py +98 -0
- spanforge/export/siem_splunk.py +264 -0
- spanforge/export/siem_syslog.py +212 -0
- spanforge/export/webhook.py +299 -0
- spanforge/exporters/__init__.py +30 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/exporters/sqlite.py +142 -0
- spanforge/gate.py +1150 -0
- spanforge/governance.py +181 -0
- spanforge/hitl.py +295 -0
- spanforge/http.py +187 -0
- spanforge/inspect.py +427 -0
- spanforge/integrations/__init__.py +45 -0
- spanforge/integrations/_pricing.py +280 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/azure_openai.py +133 -0
- spanforge/integrations/bedrock.py +292 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +351 -0
- spanforge/integrations/groq.py +442 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/langgraph.py +306 -0
- spanforge/integrations/llamaindex.py +373 -0
- spanforge/integrations/ollama.py +287 -0
- spanforge/integrations/openai.py +368 -0
- spanforge/integrations/together.py +483 -0
- spanforge/io.py +214 -0
- spanforge/lint.py +322 -0
- spanforge/metrics.py +417 -0
- spanforge/metrics_export.py +343 -0
- spanforge/migrate.py +402 -0
- spanforge/model_registry.py +278 -0
- spanforge/models.py +389 -0
- spanforge/namespaces/__init__.py +254 -0
- spanforge/namespaces/audit.py +256 -0
- spanforge/namespaces/cache.py +237 -0
- spanforge/namespaces/chain.py +77 -0
- spanforge/namespaces/confidence.py +72 -0
- spanforge/namespaces/consent.py +92 -0
- spanforge/namespaces/cost.py +179 -0
- spanforge/namespaces/decision.py +143 -0
- spanforge/namespaces/diff.py +157 -0
- spanforge/namespaces/drift.py +80 -0
- spanforge/namespaces/eval_.py +251 -0
- spanforge/namespaces/feedback.py +241 -0
- spanforge/namespaces/fence.py +193 -0
- spanforge/namespaces/guard.py +105 -0
- spanforge/namespaces/hitl.py +91 -0
- spanforge/namespaces/latency.py +72 -0
- spanforge/namespaces/prompt.py +190 -0
- spanforge/namespaces/redact.py +173 -0
- spanforge/namespaces/retrieval.py +379 -0
- spanforge/namespaces/runtime_governance.py +494 -0
- spanforge/namespaces/template.py +208 -0
- spanforge/namespaces/tool_call.py +77 -0
- spanforge/namespaces/trace.py +1029 -0
- spanforge/normalizer.py +171 -0
- spanforge/plugins.py +82 -0
- spanforge/presidio_backend.py +349 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +418 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +914 -0
- spanforge/regression.py +192 -0
- spanforge/runtime_policy.py +159 -0
- spanforge/sampling.py +511 -0
- spanforge/schema.py +183 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/sdk/__init__.py +625 -0
- spanforge/sdk/_base.py +584 -0
- spanforge/sdk/_base.pyi +71 -0
- spanforge/sdk/_exceptions.py +1096 -0
- spanforge/sdk/_types.py +2184 -0
- spanforge/sdk/alert.py +1514 -0
- spanforge/sdk/alert.pyi +56 -0
- spanforge/sdk/audit.py +1196 -0
- spanforge/sdk/audit.pyi +67 -0
- spanforge/sdk/cec.py +1215 -0
- spanforge/sdk/cec.pyi +37 -0
- spanforge/sdk/config.py +641 -0
- spanforge/sdk/config.pyi +55 -0
- spanforge/sdk/enterprise.py +714 -0
- spanforge/sdk/enterprise.pyi +79 -0
- spanforge/sdk/explain.py +170 -0
- spanforge/sdk/fallback.py +432 -0
- spanforge/sdk/feedback.py +351 -0
- spanforge/sdk/gate.py +874 -0
- spanforge/sdk/gate.pyi +51 -0
- spanforge/sdk/identity.py +2114 -0
- spanforge/sdk/identity.pyi +47 -0
- spanforge/sdk/lineage.py +175 -0
- spanforge/sdk/observe.py +1065 -0
- spanforge/sdk/observe.pyi +50 -0
- spanforge/sdk/operator.py +338 -0
- spanforge/sdk/pii.py +1473 -0
- spanforge/sdk/pii.pyi +119 -0
- spanforge/sdk/pipelines.py +458 -0
- spanforge/sdk/pipelines.pyi +39 -0
- spanforge/sdk/policy.py +930 -0
- spanforge/sdk/rag.py +594 -0
- spanforge/sdk/rbac.py +280 -0
- spanforge/sdk/registry.py +430 -0
- spanforge/sdk/registry.pyi +46 -0
- spanforge/sdk/scope.py +279 -0
- spanforge/sdk/secrets.py +293 -0
- spanforge/sdk/secrets.pyi +25 -0
- spanforge/sdk/security.py +560 -0
- spanforge/sdk/security.pyi +57 -0
- spanforge/sdk/trust.py +472 -0
- spanforge/sdk/trust.pyi +41 -0
- spanforge/secrets.py +799 -0
- spanforge/signing.py +1179 -0
- spanforge/stats.py +100 -0
- spanforge/stream.py +560 -0
- spanforge/testing.py +378 -0
- spanforge/testing_mocks.py +1052 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +300 -0
- spanforge/validate.py +379 -0
- spanforge-1.0.0.dist-info/METADATA +1509 -0
- spanforge-1.0.0.dist-info/RECORD +174 -0
- spanforge-1.0.0.dist-info/WHEEL +4 -0
- spanforge-1.0.0.dist-info/entry_points.txt +5 -0
- spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/cache.py
ADDED
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
"""spanforge.cache — Semantic cache engine for LLM prompt deduplication.
|
|
2
|
+
|
|
3
|
+
Deduplicates LLM calls by comparing the cosine similarity of incoming prompts
|
|
4
|
+
to previously cached prompts. When a prompt is *similar enough* (controlled
|
|
5
|
+
by ``similarity_threshold``) the cached response is returned immediately.
|
|
6
|
+
|
|
7
|
+
Public API
|
|
8
|
+
----------
|
|
9
|
+
SemanticCache Main cache class.
|
|
10
|
+
InMemoryBackend LRU in-process backend (default).
|
|
11
|
+
SQLiteBackend Persistent stdlib sqlite3 backend.
|
|
12
|
+
RedisBackend Distributed Redis backend (requires ``pip install redis``).
|
|
13
|
+
CacheEntry Dataclass returned by backend inspection.
|
|
14
|
+
CacheBackendError Base exception for backend failures.
|
|
15
|
+
cached ``@cached`` decorator for async and sync functions.
|
|
16
|
+
|
|
17
|
+
All payload event classes (``CacheHitPayload``, ``CacheMissPayload``, etc.)
|
|
18
|
+
are re-exported from ``spanforge.namespaces.cache``.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import functools
|
|
24
|
+
import hashlib
|
|
25
|
+
import math
|
|
26
|
+
import sqlite3
|
|
27
|
+
import threading
|
|
28
|
+
import time
|
|
29
|
+
from collections import OrderedDict
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
from typing import Any, Callable, TypeVar
|
|
32
|
+
|
|
33
|
+
from spanforge.exceptions import LLMSchemaError as SpanForgeError
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"CacheBackendError",
|
|
37
|
+
"CacheEntry",
|
|
38
|
+
"InMemoryBackend",
|
|
39
|
+
"RedisBackend",
|
|
40
|
+
"SQLiteBackend",
|
|
41
|
+
"SemanticCache",
|
|
42
|
+
"cached",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
_F = TypeVar("_F", bound=Callable[..., Any])
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Exceptions
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CacheBackendError(SpanForgeError):
|
|
53
|
+
"""Raised when a backend operation fails.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
backend: Name of the failing backend class, e.g. ``"SQLiteBackend"``.
|
|
57
|
+
reason: Human-readable failure description.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, backend: str, reason: str) -> None:
|
|
61
|
+
super().__init__(f"{backend}: {reason}")
|
|
62
|
+
self.backend = backend
|
|
63
|
+
self.reason = reason
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
# CacheEntry
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class CacheEntry:
|
|
73
|
+
"""A single cached record returned by backend inspection methods."""
|
|
74
|
+
|
|
75
|
+
key_hash: str
|
|
76
|
+
value: str
|
|
77
|
+
embedding: list[float]
|
|
78
|
+
created_at: float # Unix timestamp
|
|
79
|
+
ttl_seconds: int
|
|
80
|
+
namespace: str
|
|
81
|
+
tags: list[str] = field(default_factory=list)
|
|
82
|
+
similarity_score: float = 1.0 # 1.0 for direct hit, <1 for semantic match
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# Abstract CacheBackend protocol (duck-typed — no ABC required)
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _CacheBackendBase:
|
|
91
|
+
"""Shared base for all backend implementations."""
|
|
92
|
+
|
|
93
|
+
def put(self, key_hash: str, entry: CacheEntry) -> None:
|
|
94
|
+
raise NotImplementedError
|
|
95
|
+
|
|
96
|
+
def get_all(self, namespace: str) -> list[CacheEntry]:
|
|
97
|
+
raise NotImplementedError
|
|
98
|
+
|
|
99
|
+
def remove(self, key_hash: str, namespace: str) -> bool:
|
|
100
|
+
raise NotImplementedError
|
|
101
|
+
|
|
102
|
+
def remove_by_tag(self, tag: str, namespace: str) -> list[str]:
|
|
103
|
+
"""Return key_hashes removed."""
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
|
|
106
|
+
def clear_namespace(self, namespace: str) -> list[str]:
|
|
107
|
+
"""Remove all entries in *namespace*. Return removed key_hashes."""
|
|
108
|
+
raise NotImplementedError
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ---------------------------------------------------------------------------
|
|
112
|
+
# InMemoryBackend
|
|
113
|
+
# ---------------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class InMemoryBackend(_CacheBackendBase):
|
|
117
|
+
"""LRU in-process cache backend. Thread-safe. Data lost when process exits."""
|
|
118
|
+
|
|
119
|
+
def __init__(self, max_size: int = 1024) -> None:
|
|
120
|
+
self._max_size = max_size
|
|
121
|
+
self._lock = threading.Lock()
|
|
122
|
+
# key = (namespace, key_hash)
|
|
123
|
+
self._store: OrderedDict[tuple[str, str], CacheEntry] = OrderedDict()
|
|
124
|
+
|
|
125
|
+
def put(self, key_hash: str, entry: CacheEntry) -> None:
|
|
126
|
+
k = (entry.namespace, key_hash)
|
|
127
|
+
with self._lock:
|
|
128
|
+
if k in self._store:
|
|
129
|
+
self._store.move_to_end(k)
|
|
130
|
+
self._store[k] = entry
|
|
131
|
+
while len(self._store) > self._max_size:
|
|
132
|
+
self._store.popitem(last=False)
|
|
133
|
+
|
|
134
|
+
def get_all(self, namespace: str) -> list[CacheEntry]:
|
|
135
|
+
with self._lock:
|
|
136
|
+
return [v for (ns, _), v in self._store.items() if ns == namespace]
|
|
137
|
+
|
|
138
|
+
def remove(self, key_hash: str, namespace: str) -> bool:
|
|
139
|
+
k = (namespace, key_hash)
|
|
140
|
+
with self._lock:
|
|
141
|
+
return self._store.pop(k, None) is not None
|
|
142
|
+
|
|
143
|
+
def remove_by_tag(self, tag: str, namespace: str) -> list[str]:
|
|
144
|
+
with self._lock:
|
|
145
|
+
to_remove = [
|
|
146
|
+
kh
|
|
147
|
+
for (ns, kh), entry in self._store.items()
|
|
148
|
+
if ns == namespace and tag in entry.tags
|
|
149
|
+
]
|
|
150
|
+
for kh in to_remove:
|
|
151
|
+
self._store.pop((namespace, kh), None)
|
|
152
|
+
return to_remove
|
|
153
|
+
|
|
154
|
+
def clear_namespace(self, namespace: str) -> list[str]:
|
|
155
|
+
with self._lock:
|
|
156
|
+
keys = [(ns, kh) for (ns, kh) in list(self._store.keys()) if ns == namespace]
|
|
157
|
+
for k in keys:
|
|
158
|
+
self._store.pop(k, None)
|
|
159
|
+
return [kh for _, kh in keys]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
# SQLiteBackend
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class SQLiteBackend(_CacheBackendBase):
|
|
168
|
+
"""Persistent backend using stdlib ``sqlite3``. No extra dependencies."""
|
|
169
|
+
|
|
170
|
+
_CREATE_SQL = """
|
|
171
|
+
CREATE TABLE IF NOT EXISTS sf_cache (
|
|
172
|
+
namespace TEXT NOT NULL,
|
|
173
|
+
key_hash TEXT NOT NULL,
|
|
174
|
+
value TEXT NOT NULL,
|
|
175
|
+
embedding TEXT NOT NULL,
|
|
176
|
+
created_at REAL NOT NULL,
|
|
177
|
+
ttl_seconds INTEGER NOT NULL,
|
|
178
|
+
tags TEXT NOT NULL DEFAULT '',
|
|
179
|
+
PRIMARY KEY (namespace, key_hash)
|
|
180
|
+
)
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
def __init__(self, db_path: str = "spanforge_cache.db") -> None:
|
|
184
|
+
self._db_path = db_path
|
|
185
|
+
self._lock = threading.Lock()
|
|
186
|
+
try:
|
|
187
|
+
self._conn = sqlite3.connect(db_path, check_same_thread=False)
|
|
188
|
+
self._conn.execute(self._CREATE_SQL)
|
|
189
|
+
self._conn.commit()
|
|
190
|
+
except sqlite3.Error as exc:
|
|
191
|
+
raise CacheBackendError("SQLiteBackend", str(exc)) from exc
|
|
192
|
+
|
|
193
|
+
def put(self, key_hash: str, entry: CacheEntry) -> None:
|
|
194
|
+
import json as _json
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
with self._lock:
|
|
198
|
+
self._conn.execute(
|
|
199
|
+
"INSERT OR REPLACE INTO sf_cache VALUES (?,?,?,?,?,?,?)",
|
|
200
|
+
(
|
|
201
|
+
entry.namespace,
|
|
202
|
+
key_hash,
|
|
203
|
+
entry.value,
|
|
204
|
+
_json.dumps(entry.embedding),
|
|
205
|
+
entry.created_at,
|
|
206
|
+
entry.ttl_seconds,
|
|
207
|
+
",".join(entry.tags),
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
self._conn.commit()
|
|
211
|
+
except sqlite3.Error as exc:
|
|
212
|
+
raise CacheBackendError("SQLiteBackend", str(exc)) from exc
|
|
213
|
+
|
|
214
|
+
def get_all(self, namespace: str) -> list[CacheEntry]:
|
|
215
|
+
import json as _json
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
with self._lock:
|
|
219
|
+
rows = self._conn.execute(
|
|
220
|
+
"SELECT key_hash,value,embedding,created_at,ttl_seconds,tags "
|
|
221
|
+
"FROM sf_cache WHERE namespace=?",
|
|
222
|
+
(namespace,),
|
|
223
|
+
).fetchall()
|
|
224
|
+
except sqlite3.Error as exc:
|
|
225
|
+
raise CacheBackendError("SQLiteBackend", str(exc)) from exc
|
|
226
|
+
|
|
227
|
+
return [
|
|
228
|
+
CacheEntry(
|
|
229
|
+
key_hash=r[0],
|
|
230
|
+
value=r[1],
|
|
231
|
+
embedding=_json.loads(r[2]),
|
|
232
|
+
created_at=r[3],
|
|
233
|
+
ttl_seconds=r[4],
|
|
234
|
+
namespace=namespace,
|
|
235
|
+
tags=[t for t in r[5].split(",") if t],
|
|
236
|
+
)
|
|
237
|
+
for r in rows
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
def remove(self, key_hash: str, namespace: str) -> bool:
|
|
241
|
+
try:
|
|
242
|
+
with self._lock:
|
|
243
|
+
cur = self._conn.execute(
|
|
244
|
+
"DELETE FROM sf_cache WHERE namespace=? AND key_hash=?",
|
|
245
|
+
(namespace, key_hash),
|
|
246
|
+
)
|
|
247
|
+
self._conn.commit()
|
|
248
|
+
except sqlite3.Error as exc:
|
|
249
|
+
raise CacheBackendError("SQLiteBackend", str(exc)) from exc
|
|
250
|
+
return cur.rowcount > 0
|
|
251
|
+
|
|
252
|
+
def remove_by_tag(self, tag: str, namespace: str) -> list[str]:
|
|
253
|
+
entries = self.get_all(namespace)
|
|
254
|
+
removed: list[str] = []
|
|
255
|
+
for entry in entries:
|
|
256
|
+
if tag in entry.tags:
|
|
257
|
+
self.remove(entry.key_hash, namespace)
|
|
258
|
+
removed.append(entry.key_hash)
|
|
259
|
+
return removed
|
|
260
|
+
|
|
261
|
+
def clear_namespace(self, namespace: str) -> list[str]:
|
|
262
|
+
entries = self.get_all(namespace)
|
|
263
|
+
key_hashes = [e.key_hash for e in entries]
|
|
264
|
+
try:
|
|
265
|
+
with self._lock:
|
|
266
|
+
self._conn.execute("DELETE FROM sf_cache WHERE namespace=?", (namespace,))
|
|
267
|
+
self._conn.commit()
|
|
268
|
+
except sqlite3.Error as exc:
|
|
269
|
+
raise CacheBackendError("SQLiteBackend", str(exc)) from exc
|
|
270
|
+
return key_hashes
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# ---------------------------------------------------------------------------
|
|
274
|
+
# RedisBackend
|
|
275
|
+
# ---------------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class RedisBackend(_CacheBackendBase):
|
|
279
|
+
"""Distributed backend via the optional ``redis`` package.
|
|
280
|
+
|
|
281
|
+
Requires: ``pip install redis``
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
def __init__(
|
|
285
|
+
self,
|
|
286
|
+
host: str = "localhost",
|
|
287
|
+
port: int = 6379,
|
|
288
|
+
db: int = 0,
|
|
289
|
+
prefix: str = "spanforge:",
|
|
290
|
+
) -> None:
|
|
291
|
+
try:
|
|
292
|
+
import redis as _redis
|
|
293
|
+
except ImportError as exc:
|
|
294
|
+
raise CacheBackendError(
|
|
295
|
+
"RedisBackend",
|
|
296
|
+
"redis package not installed — run: pip install redis",
|
|
297
|
+
) from exc
|
|
298
|
+
self._prefix = prefix
|
|
299
|
+
self._client = _redis.Redis(host=host, port=port, db=db, decode_responses=True)
|
|
300
|
+
|
|
301
|
+
def _key(self, namespace: str, key_hash: str) -> str:
|
|
302
|
+
return f"{self._prefix}{namespace}:{key_hash}"
|
|
303
|
+
|
|
304
|
+
def put(self, key_hash: str, entry: CacheEntry) -> None:
|
|
305
|
+
import json as _json
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
k = self._key(entry.namespace, key_hash)
|
|
309
|
+
data = _json.dumps(
|
|
310
|
+
{
|
|
311
|
+
"value": entry.value,
|
|
312
|
+
"embedding": entry.embedding,
|
|
313
|
+
"created_at": entry.created_at,
|
|
314
|
+
"ttl_seconds": entry.ttl_seconds,
|
|
315
|
+
"namespace": entry.namespace,
|
|
316
|
+
"tags": entry.tags,
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
self._client.set(k, data, ex=entry.ttl_seconds)
|
|
320
|
+
except Exception as exc:
|
|
321
|
+
raise CacheBackendError("RedisBackend", str(exc)) from exc
|
|
322
|
+
|
|
323
|
+
def get_all(self, namespace: str) -> list[CacheEntry]:
|
|
324
|
+
import json as _json
|
|
325
|
+
|
|
326
|
+
try:
|
|
327
|
+
pattern = f"{self._prefix}{namespace}:*"
|
|
328
|
+
keys = self._client.keys(pattern)
|
|
329
|
+
entries: list[CacheEntry] = []
|
|
330
|
+
for k in keys:
|
|
331
|
+
raw = self._client.get(k)
|
|
332
|
+
if raw:
|
|
333
|
+
d = _json.loads(raw)
|
|
334
|
+
key_hash = k.split(":")[-1]
|
|
335
|
+
entries.append(
|
|
336
|
+
CacheEntry(
|
|
337
|
+
key_hash=key_hash,
|
|
338
|
+
value=d["value"],
|
|
339
|
+
embedding=d["embedding"],
|
|
340
|
+
created_at=d["created_at"],
|
|
341
|
+
ttl_seconds=d["ttl_seconds"],
|
|
342
|
+
namespace=namespace,
|
|
343
|
+
tags=d.get("tags", []),
|
|
344
|
+
)
|
|
345
|
+
)
|
|
346
|
+
return entries
|
|
347
|
+
except Exception as exc:
|
|
348
|
+
raise CacheBackendError("RedisBackend", str(exc)) from exc
|
|
349
|
+
|
|
350
|
+
def remove(self, key_hash: str, namespace: str) -> bool:
|
|
351
|
+
try:
|
|
352
|
+
return bool(self._client.delete(self._key(namespace, key_hash)))
|
|
353
|
+
except Exception as exc:
|
|
354
|
+
raise CacheBackendError("RedisBackend", str(exc)) from exc
|
|
355
|
+
|
|
356
|
+
def remove_by_tag(self, tag: str, namespace: str) -> list[str]:
|
|
357
|
+
entries = self.get_all(namespace)
|
|
358
|
+
removed: list[str] = []
|
|
359
|
+
for entry in entries:
|
|
360
|
+
if tag in entry.tags:
|
|
361
|
+
self.remove(entry.key_hash, namespace)
|
|
362
|
+
removed.append(entry.key_hash)
|
|
363
|
+
return removed
|
|
364
|
+
|
|
365
|
+
def clear_namespace(self, namespace: str) -> list[str]:
|
|
366
|
+
entries = self.get_all(namespace)
|
|
367
|
+
for entry in entries:
|
|
368
|
+
self.remove(entry.key_hash, namespace)
|
|
369
|
+
return [e.key_hash for e in entries]
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
# ---------------------------------------------------------------------------
|
|
373
|
+
# Embedding helper (built-in lightweight TF-IDF encoder)
|
|
374
|
+
# ---------------------------------------------------------------------------
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _default_embedder(text: str) -> list[float]:
|
|
378
|
+
"""Lightweight character n-gram embedding for local/test use.
|
|
379
|
+
|
|
380
|
+
Not suitable for production semantic search. Replace with a real
|
|
381
|
+
embedding model via ``SemanticCache(embedder=my_model.encode)``.
|
|
382
|
+
"""
|
|
383
|
+
# Use 2-char n-gram frequency vector (hash into 128-d space)
|
|
384
|
+
size = 128
|
|
385
|
+
vec = [0.0] * size
|
|
386
|
+
text_lower = text.lower()
|
|
387
|
+
for i in range(len(text_lower) - 1):
|
|
388
|
+
bigram = text_lower[i : i + 2]
|
|
389
|
+
idx = int(hashlib.md5(bigram.encode(), usedforsecurity=False).hexdigest(), 16) % size
|
|
390
|
+
vec[idx] += 1.0
|
|
391
|
+
# L2 normalise
|
|
392
|
+
norm = math.sqrt(sum(v * v for v in vec)) or 1.0
|
|
393
|
+
return [v / norm for v in vec]
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
397
|
+
"""Return cosine similarity between two unit-normalised vectors."""
|
|
398
|
+
if len(a) != len(b):
|
|
399
|
+
return 0.0
|
|
400
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
401
|
+
mag_a = math.sqrt(sum(x * x for x in a)) or 1.0
|
|
402
|
+
mag_b = math.sqrt(sum(x * x for x in b)) or 1.0
|
|
403
|
+
return dot / (mag_a * mag_b)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
# ---------------------------------------------------------------------------
|
|
407
|
+
# SemanticCache
|
|
408
|
+
# ---------------------------------------------------------------------------
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
class SemanticCache:
|
|
412
|
+
"""Semantic LLM prompt cache.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
backend: Storage backend; defaults to ``InMemoryBackend(max_size)``.
|
|
416
|
+
similarity_threshold: Minimum cosine similarity for a hit (0.0–1.0).
|
|
417
|
+
ttl_seconds: Seconds before an entry is stale.
|
|
418
|
+
namespace: Logical partition; entries from different namespaces
|
|
419
|
+
never collide.
|
|
420
|
+
embedder: Custom embedding function ``(str) -> list[float]``.
|
|
421
|
+
Defaults to the built-in n-gram encoder.
|
|
422
|
+
max_size: Capacity for the auto-created ``InMemoryBackend``.
|
|
423
|
+
emit_events: Emit ``llm.cache.*`` events on hit/miss/write/eviction.
|
|
424
|
+
"""
|
|
425
|
+
|
|
426
|
+
def __init__(
|
|
427
|
+
self,
|
|
428
|
+
backend: _CacheBackendBase | None = None,
|
|
429
|
+
similarity_threshold: float = 0.92,
|
|
430
|
+
ttl_seconds: int = 3600,
|
|
431
|
+
namespace: str = "default",
|
|
432
|
+
embedder: Callable[[str], list[float]] | None = None,
|
|
433
|
+
max_size: int = 1024,
|
|
434
|
+
emit_events: bool = True,
|
|
435
|
+
) -> None:
|
|
436
|
+
self._backend = backend or InMemoryBackend(max_size=max_size)
|
|
437
|
+
self._threshold = similarity_threshold
|
|
438
|
+
self._ttl = ttl_seconds
|
|
439
|
+
self._namespace = namespace
|
|
440
|
+
self._embedder = embedder or _default_embedder
|
|
441
|
+
self._emit_events = emit_events
|
|
442
|
+
|
|
443
|
+
def _hash(self, prompt: str) -> str:
|
|
444
|
+
return hashlib.sha256(prompt.encode()).hexdigest()[:32]
|
|
445
|
+
|
|
446
|
+
def _emit(self, event_type: str, payload_dict: dict[str, Any]) -> None:
|
|
447
|
+
if not self._emit_events:
|
|
448
|
+
return
|
|
449
|
+
try:
|
|
450
|
+
from spanforge import emit_event
|
|
451
|
+
|
|
452
|
+
emit_event(event_type, payload_dict) # type: ignore[operator]
|
|
453
|
+
except Exception: # nosec B110
|
|
454
|
+
pass # Never let event emission crash the cache path
|
|
455
|
+
|
|
456
|
+
def get(self, prompt: str) -> str | None:
|
|
457
|
+
"""Return the cached response for *prompt*, or ``None`` on miss.
|
|
458
|
+
|
|
459
|
+
Emits ``llm.cache.hit`` or ``llm.cache.miss`` when ``emit_events=True``.
|
|
460
|
+
"""
|
|
461
|
+
embedding = self._embedder(prompt)
|
|
462
|
+
now = time.time()
|
|
463
|
+
best_score = 0.0
|
|
464
|
+
best_entry: CacheEntry | None = None
|
|
465
|
+
|
|
466
|
+
for entry in self._backend.get_all(self._namespace):
|
|
467
|
+
# TTL check
|
|
468
|
+
if now - entry.created_at > entry.ttl_seconds:
|
|
469
|
+
continue
|
|
470
|
+
score = _cosine_similarity(embedding, entry.embedding)
|
|
471
|
+
if score > best_score:
|
|
472
|
+
best_score = score
|
|
473
|
+
best_entry = entry
|
|
474
|
+
|
|
475
|
+
if best_entry is not None and best_score >= self._threshold:
|
|
476
|
+
self._emit(
|
|
477
|
+
"llm.cache.hit",
|
|
478
|
+
{
|
|
479
|
+
"key_hash": best_entry.key_hash,
|
|
480
|
+
"namespace": self._namespace,
|
|
481
|
+
"similarity_score": best_score,
|
|
482
|
+
},
|
|
483
|
+
)
|
|
484
|
+
return best_entry.value
|
|
485
|
+
|
|
486
|
+
self._emit(
|
|
487
|
+
"llm.cache.miss",
|
|
488
|
+
{
|
|
489
|
+
"namespace": self._namespace,
|
|
490
|
+
"similarity_score": best_score,
|
|
491
|
+
},
|
|
492
|
+
)
|
|
493
|
+
return None
|
|
494
|
+
|
|
495
|
+
def set(self, prompt: str, value: str, tags: list[str] | None = None) -> None:
|
|
496
|
+
"""Store *value* keyed by the embedding of *prompt*.
|
|
497
|
+
|
|
498
|
+
Emits ``llm.cache.written`` when ``emit_events=True``.
|
|
499
|
+
"""
|
|
500
|
+
key_hash = self._hash(prompt)
|
|
501
|
+
embedding = self._embedder(prompt)
|
|
502
|
+
entry = CacheEntry(
|
|
503
|
+
key_hash=key_hash,
|
|
504
|
+
value=value,
|
|
505
|
+
embedding=embedding,
|
|
506
|
+
created_at=time.time(),
|
|
507
|
+
ttl_seconds=self._ttl,
|
|
508
|
+
namespace=self._namespace,
|
|
509
|
+
tags=tags or [],
|
|
510
|
+
)
|
|
511
|
+
self._backend.put(key_hash, entry)
|
|
512
|
+
self._emit(
|
|
513
|
+
"llm.cache.written",
|
|
514
|
+
{
|
|
515
|
+
"key_hash": key_hash,
|
|
516
|
+
"namespace": self._namespace,
|
|
517
|
+
},
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def invalidate_by_tag(self, tag: str) -> int:
|
|
521
|
+
"""Remove all entries tagged with *tag*. Returns number removed."""
|
|
522
|
+
removed = self._backend.remove_by_tag(tag, self._namespace)
|
|
523
|
+
for kh in removed:
|
|
524
|
+
self._emit(
|
|
525
|
+
"llm.cache.evicted",
|
|
526
|
+
{
|
|
527
|
+
"key_hash": kh,
|
|
528
|
+
"namespace": self._namespace,
|
|
529
|
+
"eviction_reason": "manual_invalidation",
|
|
530
|
+
},
|
|
531
|
+
)
|
|
532
|
+
return len(removed)
|
|
533
|
+
|
|
534
|
+
def invalidate_all(self) -> int:
|
|
535
|
+
"""Flush the entire namespace. Returns number removed."""
|
|
536
|
+
removed = self._backend.clear_namespace(self._namespace)
|
|
537
|
+
for kh in removed:
|
|
538
|
+
self._emit(
|
|
539
|
+
"llm.cache.evicted",
|
|
540
|
+
{
|
|
541
|
+
"key_hash": kh,
|
|
542
|
+
"namespace": self._namespace,
|
|
543
|
+
"eviction_reason": "manual_invalidation",
|
|
544
|
+
},
|
|
545
|
+
)
|
|
546
|
+
return len(removed)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# ---------------------------------------------------------------------------
|
|
550
|
+
# @cached decorator
|
|
551
|
+
# ---------------------------------------------------------------------------
|
|
552
|
+
|
|
553
|
+
_PROMPT_ARG_NAMES = frozenset({"prompt", "query", "text", "message"})
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def _extract_prompt(args: tuple[Any, ...], kwargs: dict[str, Any]) -> str:
|
|
557
|
+
"""Extract the cache key from function arguments."""
|
|
558
|
+
# Prefer known keyword names
|
|
559
|
+
for name in _PROMPT_ARG_NAMES:
|
|
560
|
+
if name in kwargs:
|
|
561
|
+
val = kwargs[name]
|
|
562
|
+
if isinstance(val, str):
|
|
563
|
+
return val
|
|
564
|
+
# First positional str arg
|
|
565
|
+
for arg in args:
|
|
566
|
+
if isinstance(arg, str):
|
|
567
|
+
return arg
|
|
568
|
+
# Fallback to full repr
|
|
569
|
+
return repr((args, kwargs))
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def cached(
|
|
573
|
+
_func: _F | None = None,
|
|
574
|
+
*,
|
|
575
|
+
threshold: float = 0.92,
|
|
576
|
+
ttl: int = 3600,
|
|
577
|
+
namespace: str = "default",
|
|
578
|
+
backend: _CacheBackendBase | None = None,
|
|
579
|
+
tags: list[str] | None = None,
|
|
580
|
+
emit_events: bool = True,
|
|
581
|
+
) -> Any:
|
|
582
|
+
"""Decorator that wraps an async or sync function with semantic caching.
|
|
583
|
+
|
|
584
|
+
Can be used in bare form or with arguments::
|
|
585
|
+
|
|
586
|
+
@cached
|
|
587
|
+
async def ask(prompt: str) -> str: ...
|
|
588
|
+
|
|
589
|
+
@cached(threshold=0.95, ttl=7200)
|
|
590
|
+
async def ask(prompt: str) -> str: ...
|
|
591
|
+
"""
|
|
592
|
+
_cache = SemanticCache(
|
|
593
|
+
backend=backend,
|
|
594
|
+
similarity_threshold=threshold,
|
|
595
|
+
ttl_seconds=ttl,
|
|
596
|
+
namespace=namespace,
|
|
597
|
+
emit_events=emit_events,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
def decorator(fn: _F) -> _F:
|
|
601
|
+
import asyncio
|
|
602
|
+
|
|
603
|
+
if asyncio.iscoroutinefunction(fn):
|
|
604
|
+
|
|
605
|
+
@functools.wraps(fn)
|
|
606
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
607
|
+
key = _extract_prompt(args, kwargs)
|
|
608
|
+
hit = _cache.get(key)
|
|
609
|
+
if hit is not None:
|
|
610
|
+
return hit
|
|
611
|
+
result = await fn(*args, **kwargs)
|
|
612
|
+
if isinstance(result, str):
|
|
613
|
+
_cache.set(key, result, tags=tags)
|
|
614
|
+
return result
|
|
615
|
+
|
|
616
|
+
return async_wrapper # type: ignore[return-value]
|
|
617
|
+
else:
|
|
618
|
+
|
|
619
|
+
@functools.wraps(fn)
|
|
620
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
621
|
+
key = _extract_prompt(args, kwargs)
|
|
622
|
+
hit = _cache.get(key)
|
|
623
|
+
if hit is not None:
|
|
624
|
+
return hit
|
|
625
|
+
result = fn(*args, **kwargs)
|
|
626
|
+
if isinstance(result, str):
|
|
627
|
+
_cache.set(key, result, tags=tags)
|
|
628
|
+
return result
|
|
629
|
+
|
|
630
|
+
return sync_wrapper # type: ignore[return-value]
|
|
631
|
+
|
|
632
|
+
if _func is not None:
|
|
633
|
+
# Bare @cached usage
|
|
634
|
+
return decorator(_func)
|
|
635
|
+
return decorator
|