tokenmizer 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. tokenmizer/__init__.py +21 -0
  2. tokenmizer/agents/__init__.py +0 -0
  3. tokenmizer/analytics/__init__.py +0 -0
  4. tokenmizer/analytics/engine.py +188 -0
  5. tokenmizer/api/__init__.py +0 -0
  6. tokenmizer/api/app.py +958 -0
  7. tokenmizer/api/rate_limiter.py +110 -0
  8. tokenmizer/checkpoints/__init__.py +0 -0
  9. tokenmizer/checkpoints/manager.py +383 -0
  10. tokenmizer/cli.py +153 -0
  11. tokenmizer/compression/__init__.py +0 -0
  12. tokenmizer/compression/engine.py +669 -0
  13. tokenmizer/compression/output_trimmer.py +95 -0
  14. tokenmizer/compression/window.py +104 -0
  15. tokenmizer/config/__init__.py +0 -0
  16. tokenmizer/config/settings.py +170 -0
  17. tokenmizer/core/__init__.py +0 -0
  18. tokenmizer/core/dto.py +196 -0
  19. tokenmizer/core/errors.py +35 -0
  20. tokenmizer/core/tokenizer.py +96 -0
  21. tokenmizer/dashboard/__init__.py +0 -0
  22. tokenmizer/dashboard/page.py +267 -0
  23. tokenmizer/filters/__init__.py +0 -0
  24. tokenmizer/filters/file_intelligence.py +960 -0
  25. tokenmizer/graph_memory/__init__.py +0 -0
  26. tokenmizer/graph_memory/decision_tracker.py +225 -0
  27. tokenmizer/graph_memory/graph.py +1287 -0
  28. tokenmizer/graph_memory/helpers.py +121 -0
  29. tokenmizer/graph_memory/hybrid_extractor.py +703 -0
  30. tokenmizer/graph_memory/types.py +134 -0
  31. tokenmizer/graph_memory/validator.py +304 -0
  32. tokenmizer/graph_memory/visualization.py +228 -0
  33. tokenmizer/mcp/__init__.py +0 -0
  34. tokenmizer/mcp/server.py +368 -0
  35. tokenmizer/providers/__init__.py +0 -0
  36. tokenmizer/providers/providers.py +456 -0
  37. tokenmizer/security/__init__.py +0 -0
  38. tokenmizer/security/auth.py +95 -0
  39. tokenmizer/security/middleware.py +138 -0
  40. tokenmizer/security/redaction.py +126 -0
  41. tokenmizer/semantic_cache/__init__.py +0 -0
  42. tokenmizer/semantic_cache/cache.py +383 -0
  43. tokenmizer/state/__init__.py +0 -0
  44. tokenmizer/state/backend.py +137 -0
  45. tokenmizer/storage/__init__.py +56 -0
  46. tokenmizer-0.2.4.dist-info/METADATA +529 -0
  47. tokenmizer-0.2.4.dist-info/RECORD +50 -0
  48. tokenmizer-0.2.4.dist-info/WHEEL +4 -0
  49. tokenmizer-0.2.4.dist-info/entry_points.txt +2 -0
  50. tokenmizer-0.2.4.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,95 @@
1
+ """
2
+ Output Trimmer — removes LLM verbosity without touching information.
3
+
4
+ LLMs (especially frontier models) have trained-in habits that waste tokens:
5
+ - "Certainly! I'd be happy to help with that." (+8 tokens, zero info)
6
+ - "In summary, ..." at the end (restates what was just said)
7
+ - "Let me know if you need anything else!" (+10 tokens every response)
8
+ - Excessive caveats and disclaimers on simple tasks
9
+
10
+ This trimmer removes ONLY structural filler — never content.
11
+ Average savings: 5-15% on verbose models (GPT-5.5, Gemini 3.1 Pro).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import re
16
+
17
+ from tokenmizer.core.tokenizer import count_tokens
18
+
19
+ # ── Filler patterns ───────────────────────────────────────────────────────────
20
+ # Ordered: most specific first
21
+
22
+ _OPENING_FILLERS = [
23
+ # These match at start of string (re.MULTILINE so ^ = start of any line)
24
+ re.compile(r"^Certainly[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
25
+ re.compile(r"^Of course[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
26
+ re.compile(r"^Absolutely[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
27
+ re.compile(r"^Sure[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
28
+ re.compile(r"^Great question[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
29
+ re.compile(r"^That's? (?:a )?(?:great|good|excellent|interesting) question[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
30
+ re.compile(r"^I(?:'d| would) be happy to (?:help|assist)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
31
+ re.compile(r"^I(?:'d| would) love to (?:help|assist)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
32
+ re.compile(r"^I understand(?: that)? you(?:'re| are)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
33
+ re.compile(r"^Thank you for (?:your )?(?:question|asking|reaching out)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
34
+ ]
35
+
36
+ _CLOSING_FILLERS = [
37
+ re.compile(r"\n+Let me know if (?:you(?:'d like| need| have))[^.!?]*[.!?]\s*$", re.IGNORECASE),
38
+ re.compile(r"\n+Feel free to (?:ask|reach out)[^.!?]*[.!?]\s*$", re.IGNORECASE),
39
+ re.compile(r"\n+(?:Don't hesitate|Please don't hesitate) to (?:ask|reach out)[^.!?]*[.!?]\s*$", re.IGNORECASE),
40
+ re.compile(r"\n+Is there anything (?:else|more)[^.!?]*[.!?]\s*$", re.IGNORECASE),
41
+ re.compile(r"\n+Hope (?:this|that) helps?[.!?]\s*$", re.IGNORECASE),
42
+ re.compile(r"\n+I hope (?:this|that) (?:answer|explanation|helps?)[^.!?]*[.!?]\s*$", re.IGNORECASE),
43
+ ]
44
+
45
+ _INLINE_REDUNDANCIES = [
46
+ # "In summary, ..." paragraphs that just restate the answer
47
+ re.compile(r"\n+In summary[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
48
+ re.compile(r"\n+To summarize[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
49
+ re.compile(r"\n+In conclusion[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
50
+ re.compile(r"\n+To recap[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
51
+ # Excessive disclaimer on simple code/math tasks
52
+ re.compile(r"\n+Note: This (?:code|implementation|solution) (?:is|should be) (?:tested|reviewed)[^.]*\.\s*\n", re.IGNORECASE),
53
+ ]
54
+
55
+
56
+ class OutputTrimmer:
57
+
58
+ def trim(self, text: str, level: str = "standard") -> tuple[str, int]:
59
+ """
60
+ Remove structural filler from LLM output.
61
+
62
+ Args:
63
+ text: raw LLM response
64
+ level: "lite" (openings only) | "standard" | "aggressive"
65
+
66
+ Returns:
67
+ (trimmed_text, tokens_saved)
68
+ """
69
+ if not text or len(text) < 20:
70
+ return text, 0
71
+
72
+ original_tokens = count_tokens(text)
73
+ result = text
74
+
75
+ # Opening fillers
76
+ for pat in _OPENING_FILLERS:
77
+ result = pat.sub("", result, count=1)
78
+
79
+ if level in ("standard", "aggressive"):
80
+ # Closing fillers
81
+ for pat in _CLOSING_FILLERS:
82
+ result = pat.sub("", result)
83
+
84
+ if level == "aggressive":
85
+ # Inline redundancies (only on aggressive — risky otherwise)
86
+ for pat in _INLINE_REDUNDANCIES:
87
+ result = pat.sub("\n\n", result)
88
+
89
+ result = result.strip()
90
+ # Normalize multiple blank lines
91
+ result = re.sub(r"\n{3,}", "\n\n", result)
92
+ result = result.strip()
93
+
94
+ saved = max(0, original_tokens - count_tokens(result))
95
+ return result, saved
@@ -0,0 +1,104 @@
1
+ """
2
+ Smart Message Window — kills the biggest token drain in long sessions.
3
+
4
+ Problem:
5
+ In a 50-turn session, turns 1-40 are sent verbatim EVERY single turn.
6
+ 50 turns × avg 300 tokens/turn = 15,000 tokens repeated each time.
7
+ At Opus 4.8 pricing ($5/M): 15,000 × 50 turns = 750K tokens = $3.75
8
+ just in conversation history repetition.
9
+
10
+ Solution:
11
+ Keep the last N turns verbatim (recent context).
12
+ Replace older turns with the graph memory context block.
13
+ The graph has the important information — tasks, decisions, files.
14
+ The LLM doesn't need the full conversation text to know what was done.
15
+
16
+ Quality guarantee:
17
+ - System messages always preserved
18
+ - Last N turns always verbatim (configurable, default 8)
19
+ - Graph context is accurate (SQLite-backed, not ephemeral)
20
+ - No hallucination risk: graph only contains extracted facts
21
+ """
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ from typing import TYPE_CHECKING
26
+
27
+ from tokenmizer.core.tokenizer import count_messages_tokens
28
+
29
+ if TYPE_CHECKING:
30
+ from tokenmizer.graph_memory.graph import GraphMemory
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class SmartMessageWindow:
36
+
37
+ def __init__(
38
+ self,
39
+ token_budget: int = 4000,
40
+ protect_recent: int = 8,
41
+ graph_context_budget: int = 250,
42
+ ):
43
+ self.token_budget = token_budget
44
+ self.protect_recent = protect_recent
45
+ self.graph_context_budget = graph_context_budget
46
+
47
+ def apply(
48
+ self,
49
+ messages: list[dict],
50
+ graph: "GraphMemory",
51
+ model: str = "gpt-4o",
52
+ ) -> tuple[list[dict], int]:
53
+ """
54
+ Apply smart windowing to messages.
55
+
56
+ Returns:
57
+ (windowed_messages, tokens_saved)
58
+ """
59
+ current_tokens = count_messages_tokens(messages, model)
60
+
61
+ if current_tokens <= self.token_budget:
62
+ return messages, 0 # fits — don't touch
63
+
64
+ system_msgs = [m for m in messages if m.get("role") == "system"]
65
+ conv_msgs = [m for m in messages if m.get("role") != "system"]
66
+
67
+ if len(conv_msgs) <= self.protect_recent:
68
+ return messages, 0 # not enough history to window
69
+
70
+ recent = conv_msgs[-self.protect_recent:]
71
+ old = conv_msgs[:-self.protect_recent]
72
+
73
+ # Build graph context to replace old turns
74
+ graph_ctx = graph.to_context_block(token_budget=self.graph_context_budget)
75
+
76
+ bridge_parts = []
77
+ if graph_ctx:
78
+ bridge_parts.append(f"[Session context from earlier conversation]\n{graph_ctx}")
79
+
80
+ # Add a note about what's omitted
81
+ bridge_parts.append(
82
+ f"[{len(old)} earlier messages omitted — key information preserved above]"
83
+ )
84
+
85
+ bridge_msg = {
86
+ "role": "system",
87
+ "content": "\n\n".join(bridge_parts),
88
+ }
89
+
90
+ windowed = system_msgs + [bridge_msg] + recent
91
+ windowed_tokens = count_messages_tokens(windowed, model)
92
+ saved = current_tokens - windowed_tokens
93
+
94
+ logger.info(
95
+ f"SmartWindow: {len(old)} old turns compressed → "
96
+ f"{current_tokens}→{windowed_tokens} tokens (saved {saved})"
97
+ )
98
+
99
+ return windowed, max(0, saved)
100
+
101
+
102
+ def needs_windowing(messages: list[dict], token_budget: int, model: str = "gpt-4o") -> bool:
103
+ """Quick check — should we apply windowing?"""
104
+ return count_messages_tokens(messages, model) > token_budget
File without changes
@@ -0,0 +1,170 @@
1
+ """TokenMizer configuration — Pydantic Settings with env var support."""
2
+ from __future__ import annotations
3
+
4
+ from typing import List, Literal
5
+
6
+ from pydantic import Field
7
+ from pydantic_settings import BaseSettings, SettingsConfigDict
8
+
9
+
10
+ class CompressionSettings(BaseSettings):
11
+ enabled: bool = True
12
+ engine: Literal["llmlingua2", "heuristic", "none"] = "heuristic"
13
+ ratio: float = Field(default=0.5, ge=0.1, le=1.0)
14
+ min_tokens_to_compress: int = 300
15
+
16
+
17
+ class MemorySettings(BaseSettings):
18
+ enabled: bool = True
19
+ max_tokens_before_summary: int = 4000
20
+ recent_turns_verbatim: int = 10
21
+
22
+
23
+ class GraphCheckpointSettings(BaseSettings):
24
+ enabled: bool = True
25
+ trigger_at_percent: float = Field(default=0.85, ge=0.5, le=0.99)
26
+ storage_dir: str = "./checkpoints"
27
+ max_resume_tokens: int = 400
28
+ use_llm_extraction: bool = False # set True for 80%+ recall (needs API key, ~$0.001/turn)
29
+ extraction_model: str = "" # leave empty = auto-pick cheapest model for your provider
30
+ min_confidence: float = 0.65 # minimum validation confidence threshold
31
+
32
+
33
+ class RoutingSettings(BaseSettings):
34
+ enabled: bool = False
35
+ simple_model: str = "claude-haiku-4-5"
36
+ medium_model: str = "claude-sonnet-4-6"
37
+ complex_model: str = "claude-sonnet-4-6"
38
+ complexity_threshold: float = 0.6
39
+
40
+
41
+ class CacheSettings(BaseSettings):
42
+ enabled: bool = True
43
+ similarity_threshold: float = 0.92
44
+ ttl_seconds: int = 3600
45
+ max_size: int = 10_000
46
+
47
+
48
+ class TerseOutputSettings(BaseSettings):
49
+ enabled: bool = True
50
+ level: Literal["lite", "full", "ultra"] = "full"
51
+
52
+
53
+ class Settings(BaseSettings):
54
+ model_config = SettingsConfigDict(
55
+ env_prefix="TOKENMIZER_",
56
+ env_nested_delimiter="__",
57
+ env_file=".env",
58
+ extra="ignore",
59
+ )
60
+
61
+ # Provider — synced exactly with providers/registry.py
62
+ provider: Literal[
63
+ "anthropic", "claude",
64
+ "openai", "gpt",
65
+ "deepseek",
66
+ "mistral",
67
+ "grok",
68
+ "cohere",
69
+ "gemini",
70
+ "ollama",
71
+ "openrouter",
72
+ ] = "anthropic"
73
+
74
+ default_model: str = "claude-sonnet-4-6"
75
+
76
+ # API keys (prefer env vars over config file)
77
+ anthropic_api_key: str = ""
78
+ openai_api_key: str = ""
79
+ gemini_api_key: str = ""
80
+ grok_api_key: str = ""
81
+ deepseek_api_key: str = ""
82
+ mistral_api_key: str = ""
83
+ cohere_api_key: str = ""
84
+ openrouter_api_key: str = ""
85
+
86
+ # State backend
87
+ state_backend: Literal["memory", "redis"] = "memory"
88
+ redis_url: str = "redis://localhost:6379/0"
89
+
90
+ # Auth
91
+ api_key: str = "" # TOKENMIZER_API_KEY — empty = dev mode (no auth)
92
+
93
+ # CORS
94
+ cors_origins: List[str] = ["http://localhost:3000", "http://localhost:8000"]
95
+
96
+ # Sub-configs
97
+ compression: CompressionSettings = Field(default_factory=CompressionSettings)
98
+ memory: MemorySettings = Field(default_factory=MemorySettings)
99
+ graph_checkpoint: GraphCheckpointSettings = Field(default_factory=GraphCheckpointSettings)
100
+ routing: RoutingSettings = Field(default_factory=RoutingSettings)
101
+ cache: CacheSettings = Field(default_factory=CacheSettings)
102
+ terse_output: TerseOutputSettings = Field(default_factory=TerseOutputSettings)
103
+
104
+ # Server
105
+ proxy_host: str = "0.0.0.0"
106
+ proxy_port: int = 8000
107
+
108
+ def get_api_key_for_provider(self, provider: str) -> str:
109
+ mapping = {
110
+ "anthropic": self.anthropic_api_key,
111
+ "claude": self.anthropic_api_key,
112
+ "openai": self.openai_api_key,
113
+ "gpt": self.openai_api_key,
114
+ "gemini": self.gemini_api_key,
115
+ "grok": self.grok_api_key,
116
+ "deepseek": self.deepseek_api_key,
117
+ "mistral": self.mistral_api_key,
118
+ "cohere": self.cohere_api_key,
119
+ "openrouter": self.openrouter_api_key,
120
+ "ollama": "",
121
+ }
122
+ return mapping.get(provider, "")
123
+
124
+ @classmethod
125
+ def from_yaml(cls, path: str) -> "Settings":
126
+ import yaml
127
+ with open(path) as f:
128
+ data = yaml.safe_load(f) or {}
129
+ return cls(**data)
130
+
131
+
132
+ _settings: Settings | None = None
133
+
134
+
135
+ def get_settings() -> Settings:
136
+ global _settings
137
+ if _settings is None:
138
+ import logging
139
+ import os
140
+ logger = logging.getLogger(__name__)
141
+ yaml_path = os.environ.get("TOKENMIZER_CONFIG", "tokenmizer.yaml")
142
+ if os.path.exists(yaml_path):
143
+ try:
144
+ _settings = Settings.from_yaml(yaml_path)
145
+ except Exception as e:
146
+ # FIXED: previously this silently discarded the user's
147
+ # entire config file and fell back to hardcoded defaults
148
+ # with ZERO indication anything went wrong. The defaults
149
+ # are dev-mode-permissive: no API key required, CORS may
150
+ # be wider than intended, state backend is in-memory (no
151
+ # Redis). An operator who sets a real config — including
152
+ # security-relevant fields like `api_key` or
153
+ # `cors_origins` — could end up running with none of that
154
+ # applied, with no error, no warning, nothing. This is a
155
+ # security-relevant failure mode disguised as "graceful
156
+ # fallback." Logging at `error` (not silent) means a typo
157
+ # in tokenmizer.yaml is visible at startup instead of
158
+ # discovered later as "wait, why does this accept
159
+ # unauthenticated requests?"
160
+ logger.error(
161
+ f"Failed to load config from {yaml_path}: {e}. "
162
+ "Falling back to hardcoded defaults — this means any "
163
+ "settings in your YAML file (including api_key, "
164
+ "cors_origins, state_backend) are NOT applied. Fix the "
165
+ "YAML file and restart."
166
+ )
167
+ _settings = Settings()
168
+ else:
169
+ _settings = Settings()
170
+ return _settings
File without changes
tokenmizer/core/dto.py ADDED
@@ -0,0 +1,196 @@
1
+ """
2
+ DTOs — typed data transfer objects for every layer boundary.
3
+
4
+ Rule: no raw dict crosses a layer boundary.
5
+ Each module owns its output DTO. Callers unpack what they need.
6
+
7
+ tokenmizer/core/dto.py
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+
13
+ # ── Graph layer ───────────────────────────────────────────────────────────────
14
+
15
+ @dataclass(frozen=True)
16
+ class GraphNodeDTO:
17
+ id: str
18
+ type: str
19
+ label: str
20
+ status: str
21
+ summary: str
22
+ importance: float
23
+ confidence: float
24
+ age_days: float
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class GraphEdgeDTO:
29
+ source_id: str
30
+ target_id: str
31
+ type: str
32
+ weight: float
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class GraphStatsDTO:
37
+ session_id: str
38
+ node_count: int
39
+ edge_count: int
40
+ by_type: dict
41
+ by_status: dict
42
+ processed_messages: int
43
+ avg_confidence: float
44
+ # FIXED: previously decision-contradiction-check failures (the logic
45
+ # that tracks "Changed X → Y" in resume context) were swallowed at
46
+ # debug level with zero visibility. Non-zero here means that feature
47
+ # is degraded even though node creation itself kept working fine.
48
+ decision_tracking_failures: int = 0
49
+ # True if SQLite could not be initialized for this session — the graph
50
+ # is running in-memory only, with NO durable persistence whatsoever.
51
+ # A restart will lose everything. This used to only ever appear in logs.
52
+ persistence_broken: bool = False
53
+
54
+
55
+ # ── Checkpoint layer ──────────────────────────────────────────────────────────
56
+
57
+ @dataclass(frozen=True)
58
+ class CheckpointSummaryDTO:
59
+ """Lightweight — for list endpoints."""
60
+ checkpoint_id: str
61
+ session_id: str
62
+ created_at: float
63
+ context_pct: float
64
+ trigger: str
65
+ message_count: int
66
+ resume_tokens: int
67
+
68
+
69
+ @dataclass(frozen=True)
70
+ class ResumeDTO:
71
+ session_id: str
72
+ checkpoint_id: str
73
+ level: str
74
+ resume_context: str
75
+ token_count: int
76
+ node_count: int
77
+
78
+
79
+ # ── Provider layer ────────────────────────────────────────────────────────────
80
+
81
+ @dataclass
82
+ class LLMResponseDTO:
83
+ text: str
84
+ input_tokens: int
85
+ output_tokens: int
86
+ model: str
87
+ provider: str
88
+ latency_ms: float = 0.0
89
+ finish_reason: str = "stop"
90
+ cached: bool = False
91
+ cost_usd: float = 0.0
92
+
93
+ @property
94
+ def total_tokens(self) -> int:
95
+ return self.input_tokens + self.output_tokens
96
+
97
+
98
+ # ── Compression layer ─────────────────────────────────────────────────────────
99
+
100
+ @dataclass(frozen=True)
101
+ class CompressionResultDTO:
102
+ original_tokens: int
103
+ compressed_tokens: int
104
+ tokens_saved: int
105
+ quality_score: float # 0–1; if < threshold, original was used
106
+ strategy_used: str
107
+ was_compressed: bool
108
+
109
+
110
+ @dataclass(frozen=True)
111
+ class OutputTrimResultDTO:
112
+ original_tokens: int
113
+ trimmed_tokens: int
114
+ tokens_saved: int
115
+ text: str
116
+
117
+
118
+ # ── Cache layer ───────────────────────────────────────────────────────────────
119
+
120
+ @dataclass(frozen=True)
121
+ class CacheStatsDTO:
122
+ entries: int
123
+ max_size: int
124
+ utilization_pct: float
125
+ evictions: int
126
+ hit_rate: float
127
+ hit_exact: int
128
+ hit_semantic: int
129
+ miss: int
130
+ semantic_available: bool
131
+
132
+
133
+ # ── File intelligence layer ───────────────────────────────────────────────────
134
+
135
+ @dataclass(frozen=True)
136
+ class FileExtractionDTO:
137
+ file_type: str
138
+ filename: str
139
+ original_size_bytes: int
140
+ original_tokens: int
141
+ extracted_tokens: int
142
+ tokens_saved: int
143
+ savings_pct: float
144
+ content: str
145
+ summary: str
146
+ strategy_used: str
147
+ was_truncated: bool
148
+
149
+
150
+ # ── Analytics layer ───────────────────────────────────────────────────────────
151
+
152
+ @dataclass(frozen=True)
153
+ class PeriodStatsDTO:
154
+ period: str # "daily" | "weekly" | "monthly"
155
+ requests: int
156
+ tokens_saved: int
157
+ savings_pct: float
158
+ cost_saved_usd: float
159
+
160
+
161
+ @dataclass(frozen=True)
162
+ class AnalyticsSummaryDTO:
163
+ total_requests: int
164
+ daily: PeriodStatsDTO
165
+ weekly: PeriodStatsDTO
166
+ monthly: PeriodStatsDTO
167
+ layer_breakdown: dict
168
+ by_provider: dict
169
+ suggestions: list[str]
170
+
171
+
172
+ # ── Chat API layer ─────────────────────────────────────────────────────────────
173
+
174
+ @dataclass
175
+ class ChatSavingsDTO:
176
+ file_extraction: int = 0
177
+ compression: int = 0
178
+ output_trim: int = 0
179
+ cache: int = 0
180
+ windowing: int = 0
181
+ routing: int = 0
182
+
183
+ @property
184
+ def total(self) -> int:
185
+ return (self.file_extraction + self.compression + self.output_trim
186
+ + self.cache + self.windowing + self.routing)
187
+
188
+ def to_dict(self) -> dict:
189
+ return {
190
+ "file_extraction": self.file_extraction,
191
+ "compression": self.compression,
192
+ "output_trim": self.output_trim,
193
+ "cache": self.cache,
194
+ "windowing": self.windowing,
195
+ "routing": self.routing,
196
+ }
@@ -0,0 +1,35 @@
1
+ """Shared error types for TokenMizer."""
2
+ from __future__ import annotations
3
+
4
+
5
+ class TokenMizerError(Exception):
6
+ """Base exception."""
7
+
8
+
9
+ class ProviderError(TokenMizerError):
10
+ def __init__(self, provider: str, error_type: str, message: str,
11
+ retryable: bool = False, retry_after: float = 0.0):
12
+ self.provider = provider
13
+ self.error_type = error_type
14
+ self.retryable = retryable
15
+ self.retry_after = retry_after
16
+ super().__init__(f"[{provider}] {error_type}: {message}")
17
+
18
+
19
+ class ConfigError(TokenMizerError):
20
+ """Invalid configuration."""
21
+
22
+
23
+ class StorageError(TokenMizerError):
24
+ """Persistence failure."""
25
+
26
+
27
+ class CheckpointPersistError(StorageError):
28
+ """Checkpoint write failed. Callers MUST NOT treat a checkpoint as
29
+ successfully created if this is raised — there is no fallback write
30
+ path, so a swallowed instance of this error means data loss."""
31
+
32
+
33
+ class GraphPersistError(StorageError):
34
+ """Graph (node/edge) write failed. Same data-loss caveat as
35
+ CheckpointPersistError — see that class's docstring."""