tokenmizer 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. tokenmizer/__init__.py +21 -0
  2. tokenmizer/agents/__init__.py +0 -0
  3. tokenmizer/analytics/__init__.py +0 -0
  4. tokenmizer/analytics/engine.py +188 -0
  5. tokenmizer/api/__init__.py +0 -0
  6. tokenmizer/api/app.py +958 -0
  7. tokenmizer/api/rate_limiter.py +110 -0
  8. tokenmizer/checkpoints/__init__.py +0 -0
  9. tokenmizer/checkpoints/manager.py +383 -0
  10. tokenmizer/cli.py +153 -0
  11. tokenmizer/compression/__init__.py +0 -0
  12. tokenmizer/compression/engine.py +669 -0
  13. tokenmizer/compression/output_trimmer.py +95 -0
  14. tokenmizer/compression/window.py +104 -0
  15. tokenmizer/config/__init__.py +0 -0
  16. tokenmizer/config/settings.py +170 -0
  17. tokenmizer/core/__init__.py +0 -0
  18. tokenmizer/core/dto.py +196 -0
  19. tokenmizer/core/errors.py +35 -0
  20. tokenmizer/core/tokenizer.py +96 -0
  21. tokenmizer/dashboard/__init__.py +0 -0
  22. tokenmizer/dashboard/page.py +267 -0
  23. tokenmizer/filters/__init__.py +0 -0
  24. tokenmizer/filters/file_intelligence.py +960 -0
  25. tokenmizer/graph_memory/__init__.py +0 -0
  26. tokenmizer/graph_memory/decision_tracker.py +225 -0
  27. tokenmizer/graph_memory/graph.py +1287 -0
  28. tokenmizer/graph_memory/helpers.py +121 -0
  29. tokenmizer/graph_memory/hybrid_extractor.py +703 -0
  30. tokenmizer/graph_memory/types.py +134 -0
  31. tokenmizer/graph_memory/validator.py +304 -0
  32. tokenmizer/graph_memory/visualization.py +228 -0
  33. tokenmizer/mcp/__init__.py +0 -0
  34. tokenmizer/mcp/server.py +368 -0
  35. tokenmizer/providers/__init__.py +0 -0
  36. tokenmizer/providers/providers.py +456 -0
  37. tokenmizer/security/__init__.py +0 -0
  38. tokenmizer/security/auth.py +95 -0
  39. tokenmizer/security/middleware.py +138 -0
  40. tokenmizer/security/redaction.py +126 -0
  41. tokenmizer/semantic_cache/__init__.py +0 -0
  42. tokenmizer/semantic_cache/cache.py +383 -0
  43. tokenmizer/state/__init__.py +0 -0
  44. tokenmizer/state/backend.py +137 -0
  45. tokenmizer/storage/__init__.py +56 -0
  46. tokenmizer-0.2.4.dist-info/METADATA +529 -0
  47. tokenmizer-0.2.4.dist-info/RECORD +50 -0
  48. tokenmizer-0.2.4.dist-info/WHEEL +4 -0
  49. tokenmizer-0.2.4.dist-info/entry_points.txt +2 -0
  50. tokenmizer-0.2.4.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,110 @@
1
+ """
2
+ Simple token-bucket rate limiter for the proxy endpoint.
3
+ No external deps — pure stdlib.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import logging
9
+ import time
10
+ from collections import defaultdict
11
+ from dataclasses import dataclass, field
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class _Bucket:
18
+ tokens: float
19
+ last_refill: float = field(default_factory=time.monotonic)
20
+
21
+
22
+ class RateLimiter:
23
+ """
24
+ Per-client token-bucket rate limiter.
25
+ Default: 60 requests/minute per API key (or IP if no key).
26
+
27
+ Usage:
28
+ limiter = RateLimiter(rate=60, per_seconds=60, burst=10)
29
+ allowed, retry_after = limiter.check("client-id")
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ rate: int = 60,
35
+ per_seconds: int = 60,
36
+ burst: int = 10,
37
+ max_clients: int = 50_000,
38
+ ):
39
+ self.rate = rate # tokens per window
40
+ self.per_seconds = per_seconds # window length
41
+ self.burst = burst # max burst above window rate
42
+ self.max_clients = max_clients # hard cap — prevents unbounded growth
43
+ self.capacity = rate + burst
44
+ self.refill_rate = rate / per_seconds # tokens per second
45
+ self._buckets: dict[str, _Bucket] = defaultdict(
46
+ lambda: _Bucket(tokens=float(self.capacity))
47
+ )
48
+ self._lock = asyncio.Lock()
49
+ # Cleanup: evict stale buckets every 5 minutes
50
+ self._last_cleanup = time.monotonic()
51
+ self._cleanup_interval = 300
52
+
53
+ async def check(self, client_id: str) -> tuple[bool, float]:
54
+ """
55
+ Returns (allowed, retry_after_seconds).
56
+ retry_after is 0.0 if allowed.
57
+ """
58
+ async with self._lock:
59
+ now = time.monotonic()
60
+ bucket = self._buckets[client_id]
61
+
62
+ # Refill
63
+ elapsed = now - bucket.last_refill
64
+ bucket.tokens = min(
65
+ self.capacity,
66
+ bucket.tokens + elapsed * self.refill_rate
67
+ )
68
+ bucket.last_refill = now
69
+
70
+ # Evict stale buckets periodically
71
+ if now - self._last_cleanup > self._cleanup_interval:
72
+ self._evict_stale(now)
73
+
74
+ # Hard cap: if still over limit after cleanup, evict oldest entries
75
+ if len(self._buckets) >= self.max_clients:
76
+ # Remove ~10% oldest to avoid thrashing
77
+ evict_count = max(1, self.max_clients // 10)
78
+ oldest = sorted(self._buckets.items(), key=lambda x: x[1].last_refill)[:evict_count]
79
+ for k, _ in oldest:
80
+ del self._buckets[k]
81
+ logger.warning(f"Rate limiter hard cap hit — evicted {evict_count} oldest buckets")
82
+
83
+ if bucket.tokens >= 1.0:
84
+ bucket.tokens -= 1.0
85
+ return True, 0.0
86
+ else:
87
+ # How long until 1 token refills
88
+ retry_after = (1.0 - bucket.tokens) / self.refill_rate
89
+ logger.warning(f"Rate limit hit for client '{client_id}'")
90
+ return False, retry_after
91
+
92
+ def _evict_stale(self, now: float, stale_after: float = 600.0) -> None:
93
+ """Remove buckets inactive for >10 minutes to prevent memory leak."""
94
+ stale = [k for k, b in self._buckets.items()
95
+ if (now - b.last_refill) > stale_after]
96
+ for k in stale:
97
+ del self._buckets[k]
98
+ if stale:
99
+ logger.debug(f"Rate limiter: evicted {len(stale)} stale buckets")
100
+ self._last_cleanup = now
101
+
102
+
103
+ # Singleton
104
+ _limiter: RateLimiter | None = None
105
+
106
+ def get_rate_limiter(rate: int = 60, per_seconds: int = 60, burst: int = 10) -> RateLimiter:
107
+ global _limiter
108
+ if _limiter is None:
109
+ _limiter = RateLimiter(rate=rate, per_seconds=per_seconds, burst=burst)
110
+ return _limiter
File without changes
@@ -0,0 +1,383 @@
1
+ """
2
+ Checkpoint Manager — creates and restores session checkpoints.
3
+
4
+ Key fixes over V3:
5
+ - Extracts from FULL message history, not just last 10
6
+ - Tiered resume blocks (critical / standard / full)
7
+ - Graph diff between checkpoints
8
+ - Accurate token counting via tiktoken
9
+ - SQLite persistence
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ import time
16
+ import uuid
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING, Optional
20
+
21
+ if TYPE_CHECKING:
22
+ import sqlite3
23
+
24
+ from tokenmizer.core.errors import CheckpointPersistError
25
+ from tokenmizer.core.tokenizer import count_tokens
26
+ from tokenmizer.graph_memory.graph import GraphMemory
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass
32
+ class Checkpoint:
33
+ checkpoint_id: str
34
+ session_id: str
35
+ created_at: float
36
+ context_pct: float
37
+ trigger: str # "auto_threshold" | "manual" | "provider_switch"
38
+ message_count: int
39
+ graph_snapshot: dict # full graph state at checkpoint time
40
+ graph_diff: dict # diff from previous checkpoint
41
+ resume_critical: str # ~100 tokens — must-know facts
42
+ resume_standard: str # ~300 tokens — normal resume
43
+ resume_full: str # ~600 tokens — deep resume
44
+ model: str = ""
45
+ next_action: str = ""
46
+
47
+ @property
48
+ def resume_tokens(self) -> int:
49
+ return count_tokens(self.resume_standard)
50
+
51
+
52
+ class CheckpointManager:
53
+ """
54
+ Manages checkpoints for all sessions.
55
+ Stored in SQLite — survives restarts.
56
+ """
57
+
58
+ def __init__(self, storage_dir: str = "./checkpoints"):
59
+ self._dir = Path(storage_dir)
60
+ self._dir.mkdir(parents=True, exist_ok=True)
61
+ self._db_path = self._dir / "checkpoints.db"
62
+ self._safe_init_db()
63
+ self._prev_snapshots: dict[str, dict] = {} # session_id → last snapshot
64
+
65
+ def _safe_init_db(self) -> None:
66
+ """Initialize DB, deleting corrupt file if necessary."""
67
+ try:
68
+ self._init_db()
69
+ except Exception:
70
+ logger.warning(f"Checkpoint DB corrupt or unreadable — recreating: {self._db_path}")
71
+ try:
72
+ self._db_path.unlink(missing_ok=True)
73
+ except Exception as del_err:
74
+ logger.error(f"Could not delete corrupt checkpoint DB: {del_err}")
75
+ try:
76
+ self._init_db()
77
+ except Exception as e:
78
+ logger.error(f"Cannot initialize checkpoint DB after cleanup: {e}")
79
+
80
+ def _db_connect(self) -> sqlite3.Connection:
81
+ """SQLite connection with WAL mode and timeout for concurrent safety."""
82
+ import sqlite3 as _sqlite3
83
+ conn = _sqlite3.connect(str(self._db_path), timeout=5.0, check_same_thread=False)
84
+ conn.execute("PRAGMA journal_mode=WAL")
85
+ conn.execute("PRAGMA synchronous=NORMAL")
86
+ return conn
87
+
88
+ def _init_db(self) -> None:
89
+ conn = self._db_connect()
90
+ try:
91
+ conn.execute("""
92
+ CREATE TABLE IF NOT EXISTS checkpoints (
93
+ checkpoint_id TEXT PRIMARY KEY,
94
+ session_id TEXT NOT NULL,
95
+ created_at REAL NOT NULL,
96
+ context_pct REAL,
97
+ trigger TEXT,
98
+ message_count INTEGER,
99
+ data_json TEXT NOT NULL
100
+ )
101
+ """)
102
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_ckpt_session ON checkpoints(session_id)")
103
+ conn.commit()
104
+ finally:
105
+ conn.close()
106
+
107
+ def create(
108
+ self,
109
+ session_id: str,
110
+ messages: list[dict], # FULL message history — not just recent
111
+ graph: GraphMemory,
112
+ context_pct: float,
113
+ trigger: str = "auto_threshold",
114
+ model: str = "",
115
+ ) -> Checkpoint:
116
+ """Create a checkpoint from the current session state."""
117
+ checkpoint_id = f"ckpt_{uuid.uuid4().hex[:12]}"
118
+
119
+ # Force full extraction from ALL messages before snapshotting
120
+ graph.extract_from_messages(messages, incremental=True)
121
+
122
+ graph_snapshot = {
123
+ "nodes": [
124
+ {
125
+ "id": n.id,
126
+ "type": n.type.value,
127
+ "label": n.label,
128
+ "status": n.status.value,
129
+ "summary": n.summary,
130
+ "importance": n.importance,
131
+ }
132
+ for n in graph._nodes.values()
133
+ if not n._evicted
134
+ ],
135
+ "edge_count": len(graph._edges),
136
+ }
137
+
138
+ # Compute diff from previous checkpoint
139
+ prev = self._prev_snapshots.get(session_id, {"nodes": []})
140
+ graph_diff = self._compute_diff(prev, graph_snapshot)
141
+ self._prev_snapshots[session_id] = graph_snapshot
142
+
143
+ # Get last user message as next_action hint
144
+ next_action = ""
145
+ for msg in reversed(messages):
146
+ if msg.get("role") == "user":
147
+ next_action = msg.get("content", "")[:200]
148
+ break
149
+
150
+ # Build tiered resume blocks
151
+ resume_critical = self._build_critical(graph, next_action)
152
+ resume_standard = self._build_standard(graph, next_action)
153
+ resume_full = self._build_full(graph, messages, next_action)
154
+
155
+ ckpt = Checkpoint(
156
+ checkpoint_id=checkpoint_id,
157
+ session_id=session_id,
158
+ created_at=time.time(),
159
+ context_pct=context_pct,
160
+ trigger=trigger,
161
+ message_count=len(messages),
162
+ graph_snapshot=graph_snapshot,
163
+ graph_diff=graph_diff,
164
+ resume_critical=resume_critical,
165
+ resume_standard=resume_standard,
166
+ resume_full=resume_full,
167
+ model=model,
168
+ next_action=next_action,
169
+ )
170
+
171
+ self._save(ckpt)
172
+ logger.info(
173
+ f"Checkpoint {checkpoint_id}: session={session_id} "
174
+ f"msgs={len(messages)} nodes={len(graph._nodes)} "
175
+ f"context={context_pct:.0%} resume_tokens={ckpt.resume_tokens}"
176
+ )
177
+ return ckpt
178
+
179
+ def _build_critical(self, graph: GraphMemory, next_action: str) -> str:
180
+ """~100 tokens. Only open blockers + critical decisions."""
181
+ from tokenmizer.graph_memory.graph import NodeStatus, NodeType
182
+ lines = []
183
+
184
+ open_errors = [n for n in graph._nodes.values()
185
+ if n.type == NodeType.ERROR and n.status == NodeStatus.FAILED]
186
+ if open_errors:
187
+ lines.append("OPEN BUGS: " + " | ".join(e.label for e in open_errors[:3]))
188
+
189
+ high_priority_tasks = [n for n in graph._nodes.values()
190
+ if n.type == NodeType.TASK
191
+ and n.status == NodeStatus.IN_PROGRESS
192
+ and n.importance >= 0.8]
193
+ if high_priority_tasks:
194
+ lines.append("CRITICAL WIP: " + " | ".join(t.label for t in high_priority_tasks[:3]))
195
+
196
+ decisions = sorted(
197
+ [n for n in graph._nodes.values() if n.type == NodeType.DECISION],
198
+ key=lambda x: x.importance, reverse=True
199
+ )
200
+ if decisions:
201
+ lines.append("KEY DECISIONS: " + " | ".join(d.label for d in decisions[:3]))
202
+
203
+ if next_action:
204
+ lines.append(f"LAST REQUEST: {next_action[:100]}")
205
+
206
+ return "\n".join(lines)
207
+
208
+ def _build_standard(self, graph: GraphMemory, next_action: str) -> str:
209
+ """~300 tokens. Normal resume — goals, tasks, decisions, files."""
210
+ block = graph.to_context_block(token_budget=300)
211
+ if next_action:
212
+ block += f"\nContinue from: {next_action[:150]}"
213
+ return block
214
+
215
+ def _build_full(self, graph: GraphMemory, messages: list[dict], next_action: str) -> str:
216
+ """~600 tokens. Deep resume with environment, schemas, dependencies."""
217
+ from tokenmizer.graph_memory.graph import NodeStatus, NodeType
218
+ parts = [self._build_standard(graph, "")]
219
+
220
+ env_nodes = [n for n in graph._nodes.values() if n.type == NodeType.ENVIRONMENT]
221
+ if env_nodes:
222
+ parts.append("Environment: " + ", ".join(e.label for e in env_nodes[:8]))
223
+
224
+ dep_nodes = [n for n in graph._nodes.values() if n.type == NodeType.DEPENDENCY]
225
+ if dep_nodes:
226
+ parts.append("Dependencies: " + ", ".join(d.label for d in dep_nodes[:10]))
227
+
228
+ schema_nodes = [n for n in graph._nodes.values() if n.type == NodeType.SCHEMA]
229
+ if schema_nodes:
230
+ parts.append("Schemas: " + " | ".join(s.label for s in schema_nodes[:4]))
231
+
232
+ endpoint_nodes = [n for n in graph._nodes.values() if n.type == NodeType.ENDPOINT]
233
+ if endpoint_nodes:
234
+ parts.append("Endpoints: " + ", ".join(e.label for e in endpoint_nodes[:8]))
235
+
236
+ done_tasks = [n for n in graph._nodes.values()
237
+ if n.type == NodeType.TASK and n.status == NodeStatus.COMPLETED]
238
+ done_tasks.sort(key=lambda x: x.updated_at, reverse=True)
239
+ if done_tasks:
240
+ parts.append("Recently completed: " + " | ".join(t.label for t in done_tasks[:6]))
241
+
242
+ if next_action:
243
+ parts.append(f"Continue from: {next_action[:150]}")
244
+
245
+ return "\n".join(parts)
246
+
247
+ def _compute_diff(self, prev: dict, current: dict) -> dict:
248
+ prev_nodes = {n["id"]: n for n in prev.get("nodes", [])}
249
+ curr_nodes = {n["id"]: n for n in current.get("nodes", [])}
250
+ return {
251
+ "added": [n for nid, n in curr_nodes.items() if nid not in prev_nodes],
252
+ "removed": [n for nid, n in prev_nodes.items() if nid not in curr_nodes],
253
+ "status_changed": [
254
+ {"id": nid, "from": prev_nodes[nid]["status"], "to": curr_nodes[nid]["status"]}
255
+ for nid in curr_nodes
256
+ if nid in prev_nodes
257
+ and curr_nodes[nid]["status"] != prev_nodes[nid]["status"]
258
+ ],
259
+ }
260
+
261
+ def _save(self, ckpt: Checkpoint) -> None:
262
+ """
263
+ Persist a checkpoint to SQLite.
264
+
265
+ FIXED: previously this caught Exception, logged it, and returned
266
+ None — silently. The caller (create()) had no way to know the
267
+ save failed, so callers (including the auto-checkpoint trigger and
268
+ the manual /api/checkpoint endpoint) would report a checkpoint as
269
+ successfully created when nothing was actually written to disk.
270
+ For a tool whose entire pitch is "never lose context," silently
271
+ losing the checkpoint on save failure is the worst possible
272
+ failure mode — the user trusts the safety net fired and finds out
273
+ otherwise only when they try to resume and there's nothing there.
274
+
275
+ Now raises CheckpointPersistError so callers can decide how to
276
+ handle it (the API layer already wraps checkpoint creation in
277
+ try/except and returns a proper 500 — this just makes that path
278
+ reachable instead of dead code).
279
+ """
280
+ try:
281
+ conn = self._db_connect()
282
+ try:
283
+ conn.execute(
284
+ """INSERT OR REPLACE INTO checkpoints
285
+ (checkpoint_id, session_id, created_at, context_pct, trigger, message_count, data_json)
286
+ VALUES (?, ?, ?, ?, ?, ?, ?)""",
287
+ (
288
+ ckpt.checkpoint_id,
289
+ ckpt.session_id,
290
+ ckpt.created_at,
291
+ ckpt.context_pct,
292
+ ckpt.trigger,
293
+ ckpt.message_count,
294
+ json.dumps({
295
+ "graph_snapshot": ckpt.graph_snapshot,
296
+ "graph_diff": ckpt.graph_diff,
297
+ "resume_critical": ckpt.resume_critical,
298
+ "resume_standard": ckpt.resume_standard,
299
+ "resume_full": ckpt.resume_full,
300
+ "model": ckpt.model,
301
+ "next_action": ckpt.next_action,
302
+ }),
303
+ ),
304
+ )
305
+ conn.commit()
306
+ finally:
307
+ conn.close()
308
+ except Exception as e:
309
+ logger.error(f"Checkpoint save failed for {ckpt.checkpoint_id}: {e}")
310
+ raise CheckpointPersistError(
311
+ f"Failed to persist checkpoint {ckpt.checkpoint_id} for "
312
+ f"session {ckpt.session_id}: {e}"
313
+ ) from e
314
+
315
+ def get_latest(self, session_id: str) -> Optional[Checkpoint]:
316
+ try:
317
+ conn = self._db_connect()
318
+ try:
319
+ row = conn.execute(
320
+ """SELECT checkpoint_id, session_id, created_at, context_pct,
321
+ trigger, message_count, data_json
322
+ FROM checkpoints WHERE session_id=?
323
+ ORDER BY created_at DESC LIMIT 1""",
324
+ (session_id,),
325
+ ).fetchone()
326
+ finally:
327
+ conn.close()
328
+ if not row:
329
+ return None
330
+ return self._row_to_checkpoint(row)
331
+ except Exception as e:
332
+ logger.error(f"Checkpoint load failed: {e}")
333
+ return None
334
+
335
+ def list_checkpoints(self, session_id: str) -> list[dict]:
336
+ """
337
+ Returns checkpoint metadata for a session, newest first.
338
+
339
+ FIXED: previously a DB read failure here was indistinguishable from
340
+ "this session genuinely has zero checkpoints" — both returned `[]`
341
+ with zero logging. A caller (e.g. the /api/checkpoints/{session_id}
342
+ endpoint) would show an empty list to the user with no way to tell
343
+ whether checkpointing is broken or just hasn't run yet. We still
344
+ return [] on failure (changing the return type here would break the
345
+ API contract), but now we log it at error level so it's actually
346
+ visible in production instead of invisible by design.
347
+ """
348
+ try:
349
+ conn = self._db_connect()
350
+ try:
351
+ rows = conn.execute(
352
+ """SELECT checkpoint_id, created_at, context_pct, trigger, message_count
353
+ FROM checkpoints WHERE session_id=? ORDER BY created_at DESC""",
354
+ (session_id,),
355
+ ).fetchall()
356
+ finally:
357
+ conn.close()
358
+ return [
359
+ {"checkpoint_id": r[0], "created_at": r[1], "context_pct": r[2],
360
+ "trigger": r[3], "message_count": r[4]}
361
+ for r in rows
362
+ ]
363
+ except Exception as e:
364
+ logger.error(f"Checkpoint list query failed for session {session_id}: {e}")
365
+ return []
366
+
367
+ def _row_to_checkpoint(self, row) -> Checkpoint:
368
+ data = json.loads(row[6])
369
+ return Checkpoint(
370
+ checkpoint_id=row[0],
371
+ session_id=row[1],
372
+ created_at=row[2],
373
+ context_pct=row[3] or 0.0,
374
+ trigger=row[4] or "unknown",
375
+ message_count=row[5] or 0,
376
+ graph_snapshot=data.get("graph_snapshot", {}),
377
+ graph_diff=data.get("graph_diff", {}),
378
+ resume_critical=data.get("resume_critical", ""),
379
+ resume_standard=data.get("resume_standard", ""),
380
+ resume_full=data.get("resume_full", ""),
381
+ model=data.get("model", ""),
382
+ next_action=data.get("next_action", ""),
383
+ )
tokenmizer/cli.py ADDED
@@ -0,0 +1,153 @@
1
+ """TokenMizer CLI"""
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional
5
+
6
+ import typer
7
+ from rich.console import Console
8
+ from rich.panel import Panel
9
+
10
+ app = typer.Typer(
11
+ name="tokenmizer",
12
+ help="🧠 TokenMizer — Never lose your AI context again.",
13
+ add_completion=False,
14
+ )
15
+ console = Console()
16
+
17
+
18
+ @app.command()
19
+ def serve(
20
+ host: str = typer.Option("0.0.0.0", help="Bind host"),
21
+ port: int = typer.Option(8000, help="Bind port"),
22
+ config: Optional[str] = typer.Option(None, help="Path to tokenmizer.yaml"),
23
+ reload: bool = typer.Option(False, help="Auto-reload on code changes (dev only)"),
24
+ workers: int = typer.Option(1, help="Number of worker processes"),
25
+ ):
26
+ """Start TokenMizer proxy + dashboard."""
27
+ import os
28
+
29
+ import uvicorn
30
+
31
+ if config:
32
+ os.environ["TOKENMIZER_CONFIG"] = config
33
+
34
+ console.print(Panel.fit(
35
+ "[bold green]🧠 TokenMizer[/bold green]\n"
36
+ f"[dim]Proxy: http://{host}:{port}/v1/chat/completions[/dim]\n"
37
+ f"[dim]Dashboard: http://{host}:{port}[/dim]\n"
38
+ f"[dim]API Docs: http://{host}:{port}/docs[/dim]\n"
39
+ f"[dim]Health: http://{host}:{port}/health[/dim]",
40
+ border_style="green",
41
+ ))
42
+
43
+ uvicorn.run(
44
+ "tokenmizer.api.app:app",
45
+ host=host,
46
+ port=port,
47
+ reload=reload,
48
+ workers=workers if not reload else 1,
49
+ log_level="info",
50
+ )
51
+
52
+
53
+ @app.command()
54
+ def stats(
55
+ session_id: Optional[str] = typer.Argument(None, help="Session ID (optional)"),
56
+ server: str = typer.Option("http://localhost:8000", help="TokenMizer server URL"),
57
+ api_key: Optional[str] = typer.Option(None, envvar="TOKENMIZER_API_KEY"),
58
+ ):
59
+ """Print session/global analytics."""
60
+ import httpx
61
+
62
+ headers = {}
63
+ if api_key:
64
+ headers["Authorization"] = f"Bearer {api_key}"
65
+
66
+ url = f"{server}/api/stats"
67
+ if session_id:
68
+ url += f"?session_id={session_id}"
69
+
70
+ try:
71
+ data = httpx.get(url, headers=headers, timeout=5).json()
72
+ except Exception as e:
73
+ console.print(f"[red]Cannot reach server: {e}[/red]")
74
+ raise typer.Exit(1)
75
+
76
+ d = data.get("daily", {})
77
+ console.print(Panel.fit(
78
+ f"[bold]Daily Stats[/bold]\n"
79
+ f"[green]Requests: {d.get('requests', 0):,}[/green]\n"
80
+ f"[green]Tokens saved: {d.get('tokens_saved', 0):,} ({d.get('savings_pct', 0):.1f}%)[/green]\n"
81
+ f"[yellow]Cost saved: ${d.get('cost_saved_usd', 0):.4f}[/yellow]",
82
+ border_style="green",
83
+ ))
84
+
85
+
86
+ @app.command()
87
+ def checkpoint(
88
+ session_id: str = typer.Argument(..., help="Session ID to checkpoint"),
89
+ server: str = typer.Option("http://localhost:8000"),
90
+ api_key: Optional[str] = typer.Option(None, envvar="TOKENMIZER_API_KEY"),
91
+ level: str = typer.Option("standard", help="Resume level: critical | standard | full"),
92
+ ):
93
+ """Create a manual checkpoint and show resume context."""
94
+ import httpx
95
+
96
+ headers = {}
97
+ if api_key:
98
+ headers["Authorization"] = f"Bearer {api_key}"
99
+
100
+ r = httpx.post(
101
+ f"{server}/api/checkpoint?session_id={session_id}",
102
+ headers=headers,
103
+ timeout=30,
104
+ )
105
+ if r.status_code != 200:
106
+ console.print(f"[red]Error: {r.text}[/red]")
107
+ raise typer.Exit(1)
108
+
109
+ data = r.json()
110
+ console.print(Panel.fit(
111
+ f"[green]✅ Checkpoint created[/green]\n"
112
+ f"[dim]ID: {data['checkpoint_id']}[/dim]\n"
113
+ f"[dim]Nodes: {data.get('node_count', 0)}[/dim]\n"
114
+ f"[dim]Resume tokens: {data['resume_tokens']}[/dim]\n\n"
115
+ f"[bold]Resume context ({level}):[/bold]\n"
116
+ f"{data.get('resume_standard', '')}",
117
+ border_style="green",
118
+ ))
119
+
120
+
121
+ @app.command()
122
+ def resume(
123
+ session_id: str = typer.Argument(..., help="Session ID to resume"),
124
+ server: str = typer.Option("http://localhost:8000"),
125
+ level: str = typer.Option("standard", help="critical | standard | full"),
126
+ api_key: Optional[str] = typer.Option(None, envvar="TOKENMIZER_API_KEY"),
127
+ ):
128
+ """Get the resume context for a session checkpoint."""
129
+ import httpx
130
+
131
+ headers = {}
132
+ if api_key:
133
+ headers["Authorization"] = f"Bearer {api_key}"
134
+
135
+ r = httpx.get(
136
+ f"{server}/api/resume/{session_id}?level={level}",
137
+ headers=headers,
138
+ timeout=10,
139
+ )
140
+ if r.status_code == 404:
141
+ console.print(f"[yellow]No checkpoint found for session: {session_id}[/yellow]")
142
+ raise typer.Exit(1)
143
+
144
+ data = r.json()
145
+ console.print(Panel(
146
+ data["resume_context"],
147
+ title=f"[green]Resume — {session_id[:16]}... ({data['token_count']} tokens)[/green]",
148
+ border_style="green",
149
+ ))
150
+
151
+
152
+ if __name__ == "__main__":
153
+ app()
File without changes