tokenmizer 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenmizer/__init__.py +21 -0
- tokenmizer/agents/__init__.py +0 -0
- tokenmizer/analytics/__init__.py +0 -0
- tokenmizer/analytics/engine.py +188 -0
- tokenmizer/api/__init__.py +0 -0
- tokenmizer/api/app.py +958 -0
- tokenmizer/api/rate_limiter.py +110 -0
- tokenmizer/checkpoints/__init__.py +0 -0
- tokenmizer/checkpoints/manager.py +383 -0
- tokenmizer/cli.py +153 -0
- tokenmizer/compression/__init__.py +0 -0
- tokenmizer/compression/engine.py +669 -0
- tokenmizer/compression/output_trimmer.py +95 -0
- tokenmizer/compression/window.py +104 -0
- tokenmizer/config/__init__.py +0 -0
- tokenmizer/config/settings.py +170 -0
- tokenmizer/core/__init__.py +0 -0
- tokenmizer/core/dto.py +196 -0
- tokenmizer/core/errors.py +35 -0
- tokenmizer/core/tokenizer.py +96 -0
- tokenmizer/dashboard/__init__.py +0 -0
- tokenmizer/dashboard/page.py +267 -0
- tokenmizer/filters/__init__.py +0 -0
- tokenmizer/filters/file_intelligence.py +960 -0
- tokenmizer/graph_memory/__init__.py +0 -0
- tokenmizer/graph_memory/decision_tracker.py +225 -0
- tokenmizer/graph_memory/graph.py +1287 -0
- tokenmizer/graph_memory/helpers.py +121 -0
- tokenmizer/graph_memory/hybrid_extractor.py +703 -0
- tokenmizer/graph_memory/types.py +134 -0
- tokenmizer/graph_memory/validator.py +304 -0
- tokenmizer/graph_memory/visualization.py +228 -0
- tokenmizer/mcp/__init__.py +0 -0
- tokenmizer/mcp/server.py +368 -0
- tokenmizer/providers/__init__.py +0 -0
- tokenmizer/providers/providers.py +456 -0
- tokenmizer/security/__init__.py +0 -0
- tokenmizer/security/auth.py +95 -0
- tokenmizer/security/middleware.py +138 -0
- tokenmizer/security/redaction.py +126 -0
- tokenmizer/semantic_cache/__init__.py +0 -0
- tokenmizer/semantic_cache/cache.py +383 -0
- tokenmizer/state/__init__.py +0 -0
- tokenmizer/state/backend.py +137 -0
- tokenmizer/storage/__init__.py +56 -0
- tokenmizer-0.2.4.dist-info/METADATA +529 -0
- tokenmizer-0.2.4.dist-info/RECORD +50 -0
- tokenmizer-0.2.4.dist-info/WHEEL +4 -0
- tokenmizer-0.2.4.dist-info/entry_points.txt +2 -0
- tokenmizer-0.2.4.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple token-bucket rate limiter for the proxy endpoint.
|
|
3
|
+
No external deps — pure stdlib.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import logging
|
|
9
|
+
import time
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class _Bucket:
|
|
18
|
+
tokens: float
|
|
19
|
+
last_refill: float = field(default_factory=time.monotonic)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RateLimiter:
|
|
23
|
+
"""
|
|
24
|
+
Per-client token-bucket rate limiter.
|
|
25
|
+
Default: 60 requests/minute per API key (or IP if no key).
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
limiter = RateLimiter(rate=60, per_seconds=60, burst=10)
|
|
29
|
+
allowed, retry_after = limiter.check("client-id")
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
rate: int = 60,
|
|
35
|
+
per_seconds: int = 60,
|
|
36
|
+
burst: int = 10,
|
|
37
|
+
max_clients: int = 50_000,
|
|
38
|
+
):
|
|
39
|
+
self.rate = rate # tokens per window
|
|
40
|
+
self.per_seconds = per_seconds # window length
|
|
41
|
+
self.burst = burst # max burst above window rate
|
|
42
|
+
self.max_clients = max_clients # hard cap — prevents unbounded growth
|
|
43
|
+
self.capacity = rate + burst
|
|
44
|
+
self.refill_rate = rate / per_seconds # tokens per second
|
|
45
|
+
self._buckets: dict[str, _Bucket] = defaultdict(
|
|
46
|
+
lambda: _Bucket(tokens=float(self.capacity))
|
|
47
|
+
)
|
|
48
|
+
self._lock = asyncio.Lock()
|
|
49
|
+
# Cleanup: evict stale buckets every 5 minutes
|
|
50
|
+
self._last_cleanup = time.monotonic()
|
|
51
|
+
self._cleanup_interval = 300
|
|
52
|
+
|
|
53
|
+
async def check(self, client_id: str) -> tuple[bool, float]:
|
|
54
|
+
"""
|
|
55
|
+
Returns (allowed, retry_after_seconds).
|
|
56
|
+
retry_after is 0.0 if allowed.
|
|
57
|
+
"""
|
|
58
|
+
async with self._lock:
|
|
59
|
+
now = time.monotonic()
|
|
60
|
+
bucket = self._buckets[client_id]
|
|
61
|
+
|
|
62
|
+
# Refill
|
|
63
|
+
elapsed = now - bucket.last_refill
|
|
64
|
+
bucket.tokens = min(
|
|
65
|
+
self.capacity,
|
|
66
|
+
bucket.tokens + elapsed * self.refill_rate
|
|
67
|
+
)
|
|
68
|
+
bucket.last_refill = now
|
|
69
|
+
|
|
70
|
+
# Evict stale buckets periodically
|
|
71
|
+
if now - self._last_cleanup > self._cleanup_interval:
|
|
72
|
+
self._evict_stale(now)
|
|
73
|
+
|
|
74
|
+
# Hard cap: if still over limit after cleanup, evict oldest entries
|
|
75
|
+
if len(self._buckets) >= self.max_clients:
|
|
76
|
+
# Remove ~10% oldest to avoid thrashing
|
|
77
|
+
evict_count = max(1, self.max_clients // 10)
|
|
78
|
+
oldest = sorted(self._buckets.items(), key=lambda x: x[1].last_refill)[:evict_count]
|
|
79
|
+
for k, _ in oldest:
|
|
80
|
+
del self._buckets[k]
|
|
81
|
+
logger.warning(f"Rate limiter hard cap hit — evicted {evict_count} oldest buckets")
|
|
82
|
+
|
|
83
|
+
if bucket.tokens >= 1.0:
|
|
84
|
+
bucket.tokens -= 1.0
|
|
85
|
+
return True, 0.0
|
|
86
|
+
else:
|
|
87
|
+
# How long until 1 token refills
|
|
88
|
+
retry_after = (1.0 - bucket.tokens) / self.refill_rate
|
|
89
|
+
logger.warning(f"Rate limit hit for client '{client_id}'")
|
|
90
|
+
return False, retry_after
|
|
91
|
+
|
|
92
|
+
def _evict_stale(self, now: float, stale_after: float = 600.0) -> None:
|
|
93
|
+
"""Remove buckets inactive for >10 minutes to prevent memory leak."""
|
|
94
|
+
stale = [k for k, b in self._buckets.items()
|
|
95
|
+
if (now - b.last_refill) > stale_after]
|
|
96
|
+
for k in stale:
|
|
97
|
+
del self._buckets[k]
|
|
98
|
+
if stale:
|
|
99
|
+
logger.debug(f"Rate limiter: evicted {len(stale)} stale buckets")
|
|
100
|
+
self._last_cleanup = now
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Singleton
|
|
104
|
+
_limiter: RateLimiter | None = None
|
|
105
|
+
|
|
106
|
+
def get_rate_limiter(rate: int = 60, per_seconds: int = 60, burst: int = 10) -> RateLimiter:
|
|
107
|
+
global _limiter
|
|
108
|
+
if _limiter is None:
|
|
109
|
+
_limiter = RateLimiter(rate=rate, per_seconds=per_seconds, burst=burst)
|
|
110
|
+
return _limiter
|
|
File without changes
|
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Checkpoint Manager — creates and restores session checkpoints.
|
|
3
|
+
|
|
4
|
+
Key fixes over V3:
|
|
5
|
+
- Extracts from FULL message history, not just last 10
|
|
6
|
+
- Tiered resume blocks (critical / standard / full)
|
|
7
|
+
- Graph diff between checkpoints
|
|
8
|
+
- Accurate token counting via tiktoken
|
|
9
|
+
- SQLite persistence
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import time
|
|
16
|
+
import uuid
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import TYPE_CHECKING, Optional
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
import sqlite3
|
|
23
|
+
|
|
24
|
+
from tokenmizer.core.errors import CheckpointPersistError
|
|
25
|
+
from tokenmizer.core.tokenizer import count_tokens
|
|
26
|
+
from tokenmizer.graph_memory.graph import GraphMemory
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class Checkpoint:
|
|
33
|
+
checkpoint_id: str
|
|
34
|
+
session_id: str
|
|
35
|
+
created_at: float
|
|
36
|
+
context_pct: float
|
|
37
|
+
trigger: str # "auto_threshold" | "manual" | "provider_switch"
|
|
38
|
+
message_count: int
|
|
39
|
+
graph_snapshot: dict # full graph state at checkpoint time
|
|
40
|
+
graph_diff: dict # diff from previous checkpoint
|
|
41
|
+
resume_critical: str # ~100 tokens — must-know facts
|
|
42
|
+
resume_standard: str # ~300 tokens — normal resume
|
|
43
|
+
resume_full: str # ~600 tokens — deep resume
|
|
44
|
+
model: str = ""
|
|
45
|
+
next_action: str = ""
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def resume_tokens(self) -> int:
|
|
49
|
+
return count_tokens(self.resume_standard)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class CheckpointManager:
|
|
53
|
+
"""
|
|
54
|
+
Manages checkpoints for all sessions.
|
|
55
|
+
Stored in SQLite — survives restarts.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, storage_dir: str = "./checkpoints"):
|
|
59
|
+
self._dir = Path(storage_dir)
|
|
60
|
+
self._dir.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
self._db_path = self._dir / "checkpoints.db"
|
|
62
|
+
self._safe_init_db()
|
|
63
|
+
self._prev_snapshots: dict[str, dict] = {} # session_id → last snapshot
|
|
64
|
+
|
|
65
|
+
def _safe_init_db(self) -> None:
|
|
66
|
+
"""Initialize DB, deleting corrupt file if necessary."""
|
|
67
|
+
try:
|
|
68
|
+
self._init_db()
|
|
69
|
+
except Exception:
|
|
70
|
+
logger.warning(f"Checkpoint DB corrupt or unreadable — recreating: {self._db_path}")
|
|
71
|
+
try:
|
|
72
|
+
self._db_path.unlink(missing_ok=True)
|
|
73
|
+
except Exception as del_err:
|
|
74
|
+
logger.error(f"Could not delete corrupt checkpoint DB: {del_err}")
|
|
75
|
+
try:
|
|
76
|
+
self._init_db()
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Cannot initialize checkpoint DB after cleanup: {e}")
|
|
79
|
+
|
|
80
|
+
def _db_connect(self) -> sqlite3.Connection:
|
|
81
|
+
"""SQLite connection with WAL mode and timeout for concurrent safety."""
|
|
82
|
+
import sqlite3 as _sqlite3
|
|
83
|
+
conn = _sqlite3.connect(str(self._db_path), timeout=5.0, check_same_thread=False)
|
|
84
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
85
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
|
86
|
+
return conn
|
|
87
|
+
|
|
88
|
+
def _init_db(self) -> None:
|
|
89
|
+
conn = self._db_connect()
|
|
90
|
+
try:
|
|
91
|
+
conn.execute("""
|
|
92
|
+
CREATE TABLE IF NOT EXISTS checkpoints (
|
|
93
|
+
checkpoint_id TEXT PRIMARY KEY,
|
|
94
|
+
session_id TEXT NOT NULL,
|
|
95
|
+
created_at REAL NOT NULL,
|
|
96
|
+
context_pct REAL,
|
|
97
|
+
trigger TEXT,
|
|
98
|
+
message_count INTEGER,
|
|
99
|
+
data_json TEXT NOT NULL
|
|
100
|
+
)
|
|
101
|
+
""")
|
|
102
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_ckpt_session ON checkpoints(session_id)")
|
|
103
|
+
conn.commit()
|
|
104
|
+
finally:
|
|
105
|
+
conn.close()
|
|
106
|
+
|
|
107
|
+
def create(
|
|
108
|
+
self,
|
|
109
|
+
session_id: str,
|
|
110
|
+
messages: list[dict], # FULL message history — not just recent
|
|
111
|
+
graph: GraphMemory,
|
|
112
|
+
context_pct: float,
|
|
113
|
+
trigger: str = "auto_threshold",
|
|
114
|
+
model: str = "",
|
|
115
|
+
) -> Checkpoint:
|
|
116
|
+
"""Create a checkpoint from the current session state."""
|
|
117
|
+
checkpoint_id = f"ckpt_{uuid.uuid4().hex[:12]}"
|
|
118
|
+
|
|
119
|
+
# Force full extraction from ALL messages before snapshotting
|
|
120
|
+
graph.extract_from_messages(messages, incremental=True)
|
|
121
|
+
|
|
122
|
+
graph_snapshot = {
|
|
123
|
+
"nodes": [
|
|
124
|
+
{
|
|
125
|
+
"id": n.id,
|
|
126
|
+
"type": n.type.value,
|
|
127
|
+
"label": n.label,
|
|
128
|
+
"status": n.status.value,
|
|
129
|
+
"summary": n.summary,
|
|
130
|
+
"importance": n.importance,
|
|
131
|
+
}
|
|
132
|
+
for n in graph._nodes.values()
|
|
133
|
+
if not n._evicted
|
|
134
|
+
],
|
|
135
|
+
"edge_count": len(graph._edges),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# Compute diff from previous checkpoint
|
|
139
|
+
prev = self._prev_snapshots.get(session_id, {"nodes": []})
|
|
140
|
+
graph_diff = self._compute_diff(prev, graph_snapshot)
|
|
141
|
+
self._prev_snapshots[session_id] = graph_snapshot
|
|
142
|
+
|
|
143
|
+
# Get last user message as next_action hint
|
|
144
|
+
next_action = ""
|
|
145
|
+
for msg in reversed(messages):
|
|
146
|
+
if msg.get("role") == "user":
|
|
147
|
+
next_action = msg.get("content", "")[:200]
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
# Build tiered resume blocks
|
|
151
|
+
resume_critical = self._build_critical(graph, next_action)
|
|
152
|
+
resume_standard = self._build_standard(graph, next_action)
|
|
153
|
+
resume_full = self._build_full(graph, messages, next_action)
|
|
154
|
+
|
|
155
|
+
ckpt = Checkpoint(
|
|
156
|
+
checkpoint_id=checkpoint_id,
|
|
157
|
+
session_id=session_id,
|
|
158
|
+
created_at=time.time(),
|
|
159
|
+
context_pct=context_pct,
|
|
160
|
+
trigger=trigger,
|
|
161
|
+
message_count=len(messages),
|
|
162
|
+
graph_snapshot=graph_snapshot,
|
|
163
|
+
graph_diff=graph_diff,
|
|
164
|
+
resume_critical=resume_critical,
|
|
165
|
+
resume_standard=resume_standard,
|
|
166
|
+
resume_full=resume_full,
|
|
167
|
+
model=model,
|
|
168
|
+
next_action=next_action,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
self._save(ckpt)
|
|
172
|
+
logger.info(
|
|
173
|
+
f"Checkpoint {checkpoint_id}: session={session_id} "
|
|
174
|
+
f"msgs={len(messages)} nodes={len(graph._nodes)} "
|
|
175
|
+
f"context={context_pct:.0%} resume_tokens={ckpt.resume_tokens}"
|
|
176
|
+
)
|
|
177
|
+
return ckpt
|
|
178
|
+
|
|
179
|
+
def _build_critical(self, graph: GraphMemory, next_action: str) -> str:
|
|
180
|
+
"""~100 tokens. Only open blockers + critical decisions."""
|
|
181
|
+
from tokenmizer.graph_memory.graph import NodeStatus, NodeType
|
|
182
|
+
lines = []
|
|
183
|
+
|
|
184
|
+
open_errors = [n for n in graph._nodes.values()
|
|
185
|
+
if n.type == NodeType.ERROR and n.status == NodeStatus.FAILED]
|
|
186
|
+
if open_errors:
|
|
187
|
+
lines.append("OPEN BUGS: " + " | ".join(e.label for e in open_errors[:3]))
|
|
188
|
+
|
|
189
|
+
high_priority_tasks = [n for n in graph._nodes.values()
|
|
190
|
+
if n.type == NodeType.TASK
|
|
191
|
+
and n.status == NodeStatus.IN_PROGRESS
|
|
192
|
+
and n.importance >= 0.8]
|
|
193
|
+
if high_priority_tasks:
|
|
194
|
+
lines.append("CRITICAL WIP: " + " | ".join(t.label for t in high_priority_tasks[:3]))
|
|
195
|
+
|
|
196
|
+
decisions = sorted(
|
|
197
|
+
[n for n in graph._nodes.values() if n.type == NodeType.DECISION],
|
|
198
|
+
key=lambda x: x.importance, reverse=True
|
|
199
|
+
)
|
|
200
|
+
if decisions:
|
|
201
|
+
lines.append("KEY DECISIONS: " + " | ".join(d.label for d in decisions[:3]))
|
|
202
|
+
|
|
203
|
+
if next_action:
|
|
204
|
+
lines.append(f"LAST REQUEST: {next_action[:100]}")
|
|
205
|
+
|
|
206
|
+
return "\n".join(lines)
|
|
207
|
+
|
|
208
|
+
def _build_standard(self, graph: GraphMemory, next_action: str) -> str:
|
|
209
|
+
"""~300 tokens. Normal resume — goals, tasks, decisions, files."""
|
|
210
|
+
block = graph.to_context_block(token_budget=300)
|
|
211
|
+
if next_action:
|
|
212
|
+
block += f"\nContinue from: {next_action[:150]}"
|
|
213
|
+
return block
|
|
214
|
+
|
|
215
|
+
def _build_full(self, graph: GraphMemory, messages: list[dict], next_action: str) -> str:
|
|
216
|
+
"""~600 tokens. Deep resume with environment, schemas, dependencies."""
|
|
217
|
+
from tokenmizer.graph_memory.graph import NodeStatus, NodeType
|
|
218
|
+
parts = [self._build_standard(graph, "")]
|
|
219
|
+
|
|
220
|
+
env_nodes = [n for n in graph._nodes.values() if n.type == NodeType.ENVIRONMENT]
|
|
221
|
+
if env_nodes:
|
|
222
|
+
parts.append("Environment: " + ", ".join(e.label for e in env_nodes[:8]))
|
|
223
|
+
|
|
224
|
+
dep_nodes = [n for n in graph._nodes.values() if n.type == NodeType.DEPENDENCY]
|
|
225
|
+
if dep_nodes:
|
|
226
|
+
parts.append("Dependencies: " + ", ".join(d.label for d in dep_nodes[:10]))
|
|
227
|
+
|
|
228
|
+
schema_nodes = [n for n in graph._nodes.values() if n.type == NodeType.SCHEMA]
|
|
229
|
+
if schema_nodes:
|
|
230
|
+
parts.append("Schemas: " + " | ".join(s.label for s in schema_nodes[:4]))
|
|
231
|
+
|
|
232
|
+
endpoint_nodes = [n for n in graph._nodes.values() if n.type == NodeType.ENDPOINT]
|
|
233
|
+
if endpoint_nodes:
|
|
234
|
+
parts.append("Endpoints: " + ", ".join(e.label for e in endpoint_nodes[:8]))
|
|
235
|
+
|
|
236
|
+
done_tasks = [n for n in graph._nodes.values()
|
|
237
|
+
if n.type == NodeType.TASK and n.status == NodeStatus.COMPLETED]
|
|
238
|
+
done_tasks.sort(key=lambda x: x.updated_at, reverse=True)
|
|
239
|
+
if done_tasks:
|
|
240
|
+
parts.append("Recently completed: " + " | ".join(t.label for t in done_tasks[:6]))
|
|
241
|
+
|
|
242
|
+
if next_action:
|
|
243
|
+
parts.append(f"Continue from: {next_action[:150]}")
|
|
244
|
+
|
|
245
|
+
return "\n".join(parts)
|
|
246
|
+
|
|
247
|
+
def _compute_diff(self, prev: dict, current: dict) -> dict:
|
|
248
|
+
prev_nodes = {n["id"]: n for n in prev.get("nodes", [])}
|
|
249
|
+
curr_nodes = {n["id"]: n for n in current.get("nodes", [])}
|
|
250
|
+
return {
|
|
251
|
+
"added": [n for nid, n in curr_nodes.items() if nid not in prev_nodes],
|
|
252
|
+
"removed": [n for nid, n in prev_nodes.items() if nid not in curr_nodes],
|
|
253
|
+
"status_changed": [
|
|
254
|
+
{"id": nid, "from": prev_nodes[nid]["status"], "to": curr_nodes[nid]["status"]}
|
|
255
|
+
for nid in curr_nodes
|
|
256
|
+
if nid in prev_nodes
|
|
257
|
+
and curr_nodes[nid]["status"] != prev_nodes[nid]["status"]
|
|
258
|
+
],
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
def _save(self, ckpt: Checkpoint) -> None:
|
|
262
|
+
"""
|
|
263
|
+
Persist a checkpoint to SQLite.
|
|
264
|
+
|
|
265
|
+
FIXED: previously this caught Exception, logged it, and returned
|
|
266
|
+
None — silently. The caller (create()) had no way to know the
|
|
267
|
+
save failed, so callers (including the auto-checkpoint trigger and
|
|
268
|
+
the manual /api/checkpoint endpoint) would report a checkpoint as
|
|
269
|
+
successfully created when nothing was actually written to disk.
|
|
270
|
+
For a tool whose entire pitch is "never lose context," silently
|
|
271
|
+
losing the checkpoint on save failure is the worst possible
|
|
272
|
+
failure mode — the user trusts the safety net fired and finds out
|
|
273
|
+
otherwise only when they try to resume and there's nothing there.
|
|
274
|
+
|
|
275
|
+
Now raises CheckpointPersistError so callers can decide how to
|
|
276
|
+
handle it (the API layer already wraps checkpoint creation in
|
|
277
|
+
try/except and returns a proper 500 — this just makes that path
|
|
278
|
+
reachable instead of dead code).
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
conn = self._db_connect()
|
|
282
|
+
try:
|
|
283
|
+
conn.execute(
|
|
284
|
+
"""INSERT OR REPLACE INTO checkpoints
|
|
285
|
+
(checkpoint_id, session_id, created_at, context_pct, trigger, message_count, data_json)
|
|
286
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
287
|
+
(
|
|
288
|
+
ckpt.checkpoint_id,
|
|
289
|
+
ckpt.session_id,
|
|
290
|
+
ckpt.created_at,
|
|
291
|
+
ckpt.context_pct,
|
|
292
|
+
ckpt.trigger,
|
|
293
|
+
ckpt.message_count,
|
|
294
|
+
json.dumps({
|
|
295
|
+
"graph_snapshot": ckpt.graph_snapshot,
|
|
296
|
+
"graph_diff": ckpt.graph_diff,
|
|
297
|
+
"resume_critical": ckpt.resume_critical,
|
|
298
|
+
"resume_standard": ckpt.resume_standard,
|
|
299
|
+
"resume_full": ckpt.resume_full,
|
|
300
|
+
"model": ckpt.model,
|
|
301
|
+
"next_action": ckpt.next_action,
|
|
302
|
+
}),
|
|
303
|
+
),
|
|
304
|
+
)
|
|
305
|
+
conn.commit()
|
|
306
|
+
finally:
|
|
307
|
+
conn.close()
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.error(f"Checkpoint save failed for {ckpt.checkpoint_id}: {e}")
|
|
310
|
+
raise CheckpointPersistError(
|
|
311
|
+
f"Failed to persist checkpoint {ckpt.checkpoint_id} for "
|
|
312
|
+
f"session {ckpt.session_id}: {e}"
|
|
313
|
+
) from e
|
|
314
|
+
|
|
315
|
+
def get_latest(self, session_id: str) -> Optional[Checkpoint]:
|
|
316
|
+
try:
|
|
317
|
+
conn = self._db_connect()
|
|
318
|
+
try:
|
|
319
|
+
row = conn.execute(
|
|
320
|
+
"""SELECT checkpoint_id, session_id, created_at, context_pct,
|
|
321
|
+
trigger, message_count, data_json
|
|
322
|
+
FROM checkpoints WHERE session_id=?
|
|
323
|
+
ORDER BY created_at DESC LIMIT 1""",
|
|
324
|
+
(session_id,),
|
|
325
|
+
).fetchone()
|
|
326
|
+
finally:
|
|
327
|
+
conn.close()
|
|
328
|
+
if not row:
|
|
329
|
+
return None
|
|
330
|
+
return self._row_to_checkpoint(row)
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.error(f"Checkpoint load failed: {e}")
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
def list_checkpoints(self, session_id: str) -> list[dict]:
|
|
336
|
+
"""
|
|
337
|
+
Returns checkpoint metadata for a session, newest first.
|
|
338
|
+
|
|
339
|
+
FIXED: previously a DB read failure here was indistinguishable from
|
|
340
|
+
"this session genuinely has zero checkpoints" — both returned `[]`
|
|
341
|
+
with zero logging. A caller (e.g. the /api/checkpoints/{session_id}
|
|
342
|
+
endpoint) would show an empty list to the user with no way to tell
|
|
343
|
+
whether checkpointing is broken or just hasn't run yet. We still
|
|
344
|
+
return [] on failure (changing the return type here would break the
|
|
345
|
+
API contract), but now we log it at error level so it's actually
|
|
346
|
+
visible in production instead of invisible by design.
|
|
347
|
+
"""
|
|
348
|
+
try:
|
|
349
|
+
conn = self._db_connect()
|
|
350
|
+
try:
|
|
351
|
+
rows = conn.execute(
|
|
352
|
+
"""SELECT checkpoint_id, created_at, context_pct, trigger, message_count
|
|
353
|
+
FROM checkpoints WHERE session_id=? ORDER BY created_at DESC""",
|
|
354
|
+
(session_id,),
|
|
355
|
+
).fetchall()
|
|
356
|
+
finally:
|
|
357
|
+
conn.close()
|
|
358
|
+
return [
|
|
359
|
+
{"checkpoint_id": r[0], "created_at": r[1], "context_pct": r[2],
|
|
360
|
+
"trigger": r[3], "message_count": r[4]}
|
|
361
|
+
for r in rows
|
|
362
|
+
]
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Checkpoint list query failed for session {session_id}: {e}")
|
|
365
|
+
return []
|
|
366
|
+
|
|
367
|
+
def _row_to_checkpoint(self, row) -> Checkpoint:
|
|
368
|
+
data = json.loads(row[6])
|
|
369
|
+
return Checkpoint(
|
|
370
|
+
checkpoint_id=row[0],
|
|
371
|
+
session_id=row[1],
|
|
372
|
+
created_at=row[2],
|
|
373
|
+
context_pct=row[3] or 0.0,
|
|
374
|
+
trigger=row[4] or "unknown",
|
|
375
|
+
message_count=row[5] or 0,
|
|
376
|
+
graph_snapshot=data.get("graph_snapshot", {}),
|
|
377
|
+
graph_diff=data.get("graph_diff", {}),
|
|
378
|
+
resume_critical=data.get("resume_critical", ""),
|
|
379
|
+
resume_standard=data.get("resume_standard", ""),
|
|
380
|
+
resume_full=data.get("resume_full", ""),
|
|
381
|
+
model=data.get("model", ""),
|
|
382
|
+
next_action=data.get("next_action", ""),
|
|
383
|
+
)
|
tokenmizer/cli.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""TokenMizer CLI"""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.panel import Panel
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(
|
|
11
|
+
name="tokenmizer",
|
|
12
|
+
help="🧠 TokenMizer — Never lose your AI context again.",
|
|
13
|
+
add_completion=False,
|
|
14
|
+
)
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@app.command()
|
|
19
|
+
def serve(
|
|
20
|
+
host: str = typer.Option("0.0.0.0", help="Bind host"),
|
|
21
|
+
port: int = typer.Option(8000, help="Bind port"),
|
|
22
|
+
config: Optional[str] = typer.Option(None, help="Path to tokenmizer.yaml"),
|
|
23
|
+
reload: bool = typer.Option(False, help="Auto-reload on code changes (dev only)"),
|
|
24
|
+
workers: int = typer.Option(1, help="Number of worker processes"),
|
|
25
|
+
):
|
|
26
|
+
"""Start TokenMizer proxy + dashboard."""
|
|
27
|
+
import os
|
|
28
|
+
|
|
29
|
+
import uvicorn
|
|
30
|
+
|
|
31
|
+
if config:
|
|
32
|
+
os.environ["TOKENMIZER_CONFIG"] = config
|
|
33
|
+
|
|
34
|
+
console.print(Panel.fit(
|
|
35
|
+
"[bold green]🧠 TokenMizer[/bold green]\n"
|
|
36
|
+
f"[dim]Proxy: http://{host}:{port}/v1/chat/completions[/dim]\n"
|
|
37
|
+
f"[dim]Dashboard: http://{host}:{port}[/dim]\n"
|
|
38
|
+
f"[dim]API Docs: http://{host}:{port}/docs[/dim]\n"
|
|
39
|
+
f"[dim]Health: http://{host}:{port}/health[/dim]",
|
|
40
|
+
border_style="green",
|
|
41
|
+
))
|
|
42
|
+
|
|
43
|
+
uvicorn.run(
|
|
44
|
+
"tokenmizer.api.app:app",
|
|
45
|
+
host=host,
|
|
46
|
+
port=port,
|
|
47
|
+
reload=reload,
|
|
48
|
+
workers=workers if not reload else 1,
|
|
49
|
+
log_level="info",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@app.command()
|
|
54
|
+
def stats(
|
|
55
|
+
session_id: Optional[str] = typer.Argument(None, help="Session ID (optional)"),
|
|
56
|
+
server: str = typer.Option("http://localhost:8000", help="TokenMizer server URL"),
|
|
57
|
+
api_key: Optional[str] = typer.Option(None, envvar="TOKENMIZER_API_KEY"),
|
|
58
|
+
):
|
|
59
|
+
"""Print session/global analytics."""
|
|
60
|
+
import httpx
|
|
61
|
+
|
|
62
|
+
headers = {}
|
|
63
|
+
if api_key:
|
|
64
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
65
|
+
|
|
66
|
+
url = f"{server}/api/stats"
|
|
67
|
+
if session_id:
|
|
68
|
+
url += f"?session_id={session_id}"
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
data = httpx.get(url, headers=headers, timeout=5).json()
|
|
72
|
+
except Exception as e:
|
|
73
|
+
console.print(f"[red]Cannot reach server: {e}[/red]")
|
|
74
|
+
raise typer.Exit(1)
|
|
75
|
+
|
|
76
|
+
d = data.get("daily", {})
|
|
77
|
+
console.print(Panel.fit(
|
|
78
|
+
f"[bold]Daily Stats[/bold]\n"
|
|
79
|
+
f"[green]Requests: {d.get('requests', 0):,}[/green]\n"
|
|
80
|
+
f"[green]Tokens saved: {d.get('tokens_saved', 0):,} ({d.get('savings_pct', 0):.1f}%)[/green]\n"
|
|
81
|
+
f"[yellow]Cost saved: ${d.get('cost_saved_usd', 0):.4f}[/yellow]",
|
|
82
|
+
border_style="green",
|
|
83
|
+
))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@app.command()
|
|
87
|
+
def checkpoint(
|
|
88
|
+
session_id: str = typer.Argument(..., help="Session ID to checkpoint"),
|
|
89
|
+
server: str = typer.Option("http://localhost:8000"),
|
|
90
|
+
api_key: Optional[str] = typer.Option(None, envvar="TOKENMIZER_API_KEY"),
|
|
91
|
+
level: str = typer.Option("standard", help="Resume level: critical | standard | full"),
|
|
92
|
+
):
|
|
93
|
+
"""Create a manual checkpoint and show resume context."""
|
|
94
|
+
import httpx
|
|
95
|
+
|
|
96
|
+
headers = {}
|
|
97
|
+
if api_key:
|
|
98
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
99
|
+
|
|
100
|
+
r = httpx.post(
|
|
101
|
+
f"{server}/api/checkpoint?session_id={session_id}",
|
|
102
|
+
headers=headers,
|
|
103
|
+
timeout=30,
|
|
104
|
+
)
|
|
105
|
+
if r.status_code != 200:
|
|
106
|
+
console.print(f"[red]Error: {r.text}[/red]")
|
|
107
|
+
raise typer.Exit(1)
|
|
108
|
+
|
|
109
|
+
data = r.json()
|
|
110
|
+
console.print(Panel.fit(
|
|
111
|
+
f"[green]✅ Checkpoint created[/green]\n"
|
|
112
|
+
f"[dim]ID: {data['checkpoint_id']}[/dim]\n"
|
|
113
|
+
f"[dim]Nodes: {data.get('node_count', 0)}[/dim]\n"
|
|
114
|
+
f"[dim]Resume tokens: {data['resume_tokens']}[/dim]\n\n"
|
|
115
|
+
f"[bold]Resume context ({level}):[/bold]\n"
|
|
116
|
+
f"{data.get('resume_standard', '')}",
|
|
117
|
+
border_style="green",
|
|
118
|
+
))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@app.command()
|
|
122
|
+
def resume(
|
|
123
|
+
session_id: str = typer.Argument(..., help="Session ID to resume"),
|
|
124
|
+
server: str = typer.Option("http://localhost:8000"),
|
|
125
|
+
level: str = typer.Option("standard", help="critical | standard | full"),
|
|
126
|
+
api_key: Optional[str] = typer.Option(None, envvar="TOKENMIZER_API_KEY"),
|
|
127
|
+
):
|
|
128
|
+
"""Get the resume context for a session checkpoint."""
|
|
129
|
+
import httpx
|
|
130
|
+
|
|
131
|
+
headers = {}
|
|
132
|
+
if api_key:
|
|
133
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
134
|
+
|
|
135
|
+
r = httpx.get(
|
|
136
|
+
f"{server}/api/resume/{session_id}?level={level}",
|
|
137
|
+
headers=headers,
|
|
138
|
+
timeout=10,
|
|
139
|
+
)
|
|
140
|
+
if r.status_code == 404:
|
|
141
|
+
console.print(f"[yellow]No checkpoint found for session: {session_id}[/yellow]")
|
|
142
|
+
raise typer.Exit(1)
|
|
143
|
+
|
|
144
|
+
data = r.json()
|
|
145
|
+
console.print(Panel(
|
|
146
|
+
data["resume_context"],
|
|
147
|
+
title=f"[green]Resume — {session_id[:16]}... ({data['token_count']} tokens)[/green]",
|
|
148
|
+
border_style="green",
|
|
149
|
+
))
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
if __name__ == "__main__":
|
|
153
|
+
app()
|
|
File without changes
|