PyPI - tokenmizer - Versions diffs - 0.2.4__py3-none-any.whl - Mend

tokenmizer 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

tokenmizer/__init__.py +21 -0
tokenmizer/agents/__init__.py +0 -0
tokenmizer/analytics/__init__.py +0 -0
tokenmizer/analytics/engine.py +188 -0
tokenmizer/api/__init__.py +0 -0
tokenmizer/api/app.py +958 -0
tokenmizer/api/rate_limiter.py +110 -0
tokenmizer/checkpoints/__init__.py +0 -0
tokenmizer/checkpoints/manager.py +383 -0
tokenmizer/cli.py +153 -0
tokenmizer/compression/__init__.py +0 -0
tokenmizer/compression/engine.py +669 -0
tokenmizer/compression/output_trimmer.py +95 -0
tokenmizer/compression/window.py +104 -0
tokenmizer/config/__init__.py +0 -0
tokenmizer/config/settings.py +170 -0
tokenmizer/core/__init__.py +0 -0
tokenmizer/core/dto.py +196 -0
tokenmizer/core/errors.py +35 -0
tokenmizer/core/tokenizer.py +96 -0
tokenmizer/dashboard/__init__.py +0 -0
tokenmizer/dashboard/page.py +267 -0
tokenmizer/filters/__init__.py +0 -0
tokenmizer/filters/file_intelligence.py +960 -0
tokenmizer/graph_memory/__init__.py +0 -0
tokenmizer/graph_memory/decision_tracker.py +225 -0
tokenmizer/graph_memory/graph.py +1287 -0
tokenmizer/graph_memory/helpers.py +121 -0
tokenmizer/graph_memory/hybrid_extractor.py +703 -0
tokenmizer/graph_memory/types.py +134 -0
tokenmizer/graph_memory/validator.py +304 -0
tokenmizer/graph_memory/visualization.py +228 -0
tokenmizer/mcp/__init__.py +0 -0
tokenmizer/mcp/server.py +368 -0
tokenmizer/providers/__init__.py +0 -0
tokenmizer/providers/providers.py +456 -0
tokenmizer/security/__init__.py +0 -0
tokenmizer/security/auth.py +95 -0
tokenmizer/security/middleware.py +138 -0
tokenmizer/security/redaction.py +126 -0
tokenmizer/semantic_cache/__init__.py +0 -0
tokenmizer/semantic_cache/cache.py +383 -0
tokenmizer/state/__init__.py +0 -0
tokenmizer/state/backend.py +137 -0
tokenmizer/storage/__init__.py +56 -0
tokenmizer-0.2.4.dist-info/METADATA +529 -0
tokenmizer-0.2.4.dist-info/RECORD +50 -0
tokenmizer-0.2.4.dist-info/WHEEL +4 -0
tokenmizer-0.2.4.dist-info/entry_points.txt +2 -0
tokenmizer-0.2.4.dist-info/licenses/LICENSE +21 -0

tokenmizer/compression/output_trimmer.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""
+Output Trimmer — removes LLM verbosity without touching information.
+LLMs (especially frontier models) have trained-in habits that waste tokens:
+  - "Certainly! I'd be happy to help with that."  (+8 tokens, zero info)
+  - "In summary, ..." at the end (restates what was just said)
+  - "Let me know if you need anything else!" (+10 tokens every response)
+  - Excessive caveats and disclaimers on simple tasks
+This trimmer removes ONLY structural filler — never content.
+Average savings: 5-15% on verbose models (GPT-5.5, Gemini 3.1 Pro).
+"""
+from __future__ import annotations
+import re
+from tokenmizer.core.tokenizer import count_tokens
+# ── Filler patterns ───────────────────────────────────────────────────────────
+# Ordered: most specific first
+_OPENING_FILLERS = [
+    # These match at start of string (re.MULTILINE so ^ = start of any line)
+    re.compile(r"^Certainly[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^Of course[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^Absolutely[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^Sure[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^Great question[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^That's? (?:a )?(?:great|good|excellent|interesting) question[!,.]?\s+", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^I(?:'d| would) be happy to (?:help|assist)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^I(?:'d| would) love to (?:help|assist)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^I understand(?: that)? you(?:'re| are)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
+    re.compile(r"^Thank you for (?:your )?(?:question|asking|reaching out)[^.\n]*\.\s*", re.IGNORECASE | re.MULTILINE),
+]
+_CLOSING_FILLERS = [
+    re.compile(r"\n+Let me know if (?:you(?:'d like| need| have))[^.!?]*[.!?]\s*$", re.IGNORECASE),
+    re.compile(r"\n+Feel free to (?:ask|reach out)[^.!?]*[.!?]\s*$", re.IGNORECASE),
+    re.compile(r"\n+(?:Don't hesitate|Please don't hesitate) to (?:ask|reach out)[^.!?]*[.!?]\s*$", re.IGNORECASE),
+    re.compile(r"\n+Is there anything (?:else|more)[^.!?]*[.!?]\s*$", re.IGNORECASE),
+    re.compile(r"\n+Hope (?:this|that) helps?[.!?]\s*$", re.IGNORECASE),
+    re.compile(r"\n+I hope (?:this|that) (?:answer|explanation|helps?)[^.!?]*[.!?]\s*$", re.IGNORECASE),
+]
+_INLINE_REDUNDANCIES = [
+    # "In summary, ..." paragraphs that just restate the answer
+    re.compile(r"\n+In summary[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
+    re.compile(r"\n+To summarize[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
+    re.compile(r"\n+In conclusion[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
+    re.compile(r"\n+To recap[,:]?\s*[^\n]{0,200}\n+", re.IGNORECASE),
+    # Excessive disclaimer on simple code/math tasks
+    re.compile(r"\n+Note: This (?:code|implementation|solution) (?:is|should be) (?:tested|reviewed)[^.]*\.\s*\n", re.IGNORECASE),
+]
+class OutputTrimmer:
+    def trim(self, text: str, level: str = "standard") -> tuple[str, int]:
+        """
+        Remove structural filler from LLM output.
+        Args:
+            text: raw LLM response
+            level: "lite" (openings only) | "standard" | "aggressive"
+        Returns:
+            (trimmed_text, tokens_saved)
+        """
+        if not text or len(text) < 20:
+            return text, 0
+        original_tokens = count_tokens(text)
+        result = text
+        # Opening fillers
+        for pat in _OPENING_FILLERS:
+            result = pat.sub("", result, count=1)
+        if level in ("standard", "aggressive"):
+            # Closing fillers
+            for pat in _CLOSING_FILLERS:
+                result = pat.sub("", result)
+        if level == "aggressive":
+            # Inline redundancies (only on aggressive — risky otherwise)
+            for pat in _INLINE_REDUNDANCIES:
+                result = pat.sub("\n\n", result)
+        result = result.strip()
+        # Normalize multiple blank lines
+        result = re.sub(r"\n{3,}", "\n\n", result)
+        result = result.strip()
+        saved = max(0, original_tokens - count_tokens(result))
+        return result, saved

tokenmizer/compression/window.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""
+Smart Message Window — kills the biggest token drain in long sessions.
+Problem:
+  In a 50-turn session, turns 1-40 are sent verbatim EVERY single turn.
+  50 turns × avg 300 tokens/turn = 15,000 tokens repeated each time.
+  At Opus 4.8 pricing ($5/M): 15,000 × 50 turns = 750K tokens = $3.75
+  just in conversation history repetition.
+Solution:
+  Keep the last N turns verbatim (recent context).
+  Replace older turns with the graph memory context block.
+  The graph has the important information — tasks, decisions, files.
+  The LLM doesn't need the full conversation text to know what was done.
+Quality guarantee:
+  - System messages always preserved
+  - Last N turns always verbatim (configurable, default 8)
+  - Graph context is accurate (SQLite-backed, not ephemeral)
+  - No hallucination risk: graph only contains extracted facts
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING
+from tokenmizer.core.tokenizer import count_messages_tokens
+if TYPE_CHECKING:
+    from tokenmizer.graph_memory.graph import GraphMemory
+logger = logging.getLogger(__name__)
+class SmartMessageWindow:
+    def __init__(
+        self,
+        token_budget: int = 4000,
+        protect_recent: int = 8,
+        graph_context_budget: int = 250,
+    ):
+        self.token_budget = token_budget
+        self.protect_recent = protect_recent
+        self.graph_context_budget = graph_context_budget
+    def apply(
+        self,
+        messages: list[dict],
+        graph: "GraphMemory",
+        model: str = "gpt-4o",
+    ) -> tuple[list[dict], int]:
+        """
+        Apply smart windowing to messages.
+        Returns:
+            (windowed_messages, tokens_saved)
+        """
+        current_tokens = count_messages_tokens(messages, model)
+        if current_tokens <= self.token_budget:
+            return messages, 0  # fits — don't touch
+        system_msgs = [m for m in messages if m.get("role") == "system"]
+        conv_msgs = [m for m in messages if m.get("role") != "system"]
+        if len(conv_msgs) <= self.protect_recent:
+            return messages, 0  # not enough history to window
+        recent = conv_msgs[-self.protect_recent:]
+        old = conv_msgs[:-self.protect_recent]
+        # Build graph context to replace old turns
+        graph_ctx = graph.to_context_block(token_budget=self.graph_context_budget)
+        bridge_parts = []
+        if graph_ctx:
+            bridge_parts.append(f"[Session context from earlier conversation]\n{graph_ctx}")
+        # Add a note about what's omitted
+        bridge_parts.append(
+            f"[{len(old)} earlier messages omitted — key information preserved above]"
+        )
+        bridge_msg = {
+            "role": "system",
+            "content": "\n\n".join(bridge_parts),
+        }
+        windowed = system_msgs + [bridge_msg] + recent
+        windowed_tokens = count_messages_tokens(windowed, model)
+        saved = current_tokens - windowed_tokens
+        logger.info(
+            f"SmartWindow: {len(old)} old turns compressed → "
+            f"{current_tokens}→{windowed_tokens} tokens (saved {saved})"
+        )
+        return windowed, max(0, saved)
+def needs_windowing(messages: list[dict], token_budget: int, model: str = "gpt-4o") -> bool:
+    """Quick check — should we apply windowing?"""
+    return count_messages_tokens(messages, model) > token_budget

tokenmizer/config/__init__.py ADDED Viewed

File without changes

tokenmizer/config/settings.py ADDED Viewed

@@ -0,0 +1,170 @@
+"""TokenMizer configuration — Pydantic Settings with env var support."""
+from __future__ import annotations
+from typing import List, Literal
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class CompressionSettings(BaseSettings):
+    enabled: bool = True
+    engine: Literal["llmlingua2", "heuristic", "none"] = "heuristic"
+    ratio: float = Field(default=0.5, ge=0.1, le=1.0)
+    min_tokens_to_compress: int = 300
+class MemorySettings(BaseSettings):
+    enabled: bool = True
+    max_tokens_before_summary: int = 4000
+    recent_turns_verbatim: int = 10
+class GraphCheckpointSettings(BaseSettings):
+    enabled: bool = True
+    trigger_at_percent: float = Field(default=0.85, ge=0.5, le=0.99)
+    storage_dir: str = "./checkpoints"
+    max_resume_tokens: int = 400
+    use_llm_extraction: bool = False  # set True for 80%+ recall (needs API key, ~$0.001/turn)
+    extraction_model: str = ""        # leave empty = auto-pick cheapest model for your provider
+    min_confidence: float = 0.65      # minimum validation confidence threshold
+class RoutingSettings(BaseSettings):
+    enabled: bool = False
+    simple_model: str = "claude-haiku-4-5"
+    medium_model: str = "claude-sonnet-4-6"
+    complex_model: str = "claude-sonnet-4-6"
+    complexity_threshold: float = 0.6
+class CacheSettings(BaseSettings):
+    enabled: bool = True
+    similarity_threshold: float = 0.92
+    ttl_seconds: int = 3600
+    max_size: int = 10_000
+class TerseOutputSettings(BaseSettings):
+    enabled: bool = True
+    level: Literal["lite", "full", "ultra"] = "full"
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="TOKENMIZER_",
+        env_nested_delimiter="__",
+        env_file=".env",
+        extra="ignore",
+    )
+    # Provider — synced exactly with providers/registry.py
+    provider: Literal[
+        "anthropic", "claude",
+        "openai", "gpt",
+        "deepseek",
+        "mistral",
+        "grok",
+        "cohere",
+        "gemini",
+        "ollama",
+        "openrouter",
+    ] = "anthropic"
+    default_model: str = "claude-sonnet-4-6"
+    # API keys (prefer env vars over config file)
+    anthropic_api_key: str = ""
+    openai_api_key: str = ""
+    gemini_api_key: str = ""
+    grok_api_key: str = ""
+    deepseek_api_key: str = ""
+    mistral_api_key: str = ""
+    cohere_api_key: str = ""
+    openrouter_api_key: str = ""
+    # State backend
+    state_backend: Literal["memory", "redis"] = "memory"
+    redis_url: str = "redis://localhost:6379/0"
+    # Auth
+    api_key: str = ""  # TOKENMIZER_API_KEY — empty = dev mode (no auth)
+    # CORS
+    cors_origins: List[str] = ["http://localhost:3000", "http://localhost:8000"]
+    # Sub-configs
+    compression: CompressionSettings = Field(default_factory=CompressionSettings)
+    memory: MemorySettings = Field(default_factory=MemorySettings)
+    graph_checkpoint: GraphCheckpointSettings = Field(default_factory=GraphCheckpointSettings)
+    routing: RoutingSettings = Field(default_factory=RoutingSettings)
+    cache: CacheSettings = Field(default_factory=CacheSettings)
+    terse_output: TerseOutputSettings = Field(default_factory=TerseOutputSettings)
+    # Server
+    proxy_host: str = "0.0.0.0"
+    proxy_port: int = 8000
+    def get_api_key_for_provider(self, provider: str) -> str:
+        mapping = {
+            "anthropic": self.anthropic_api_key,
+            "claude": self.anthropic_api_key,
+            "openai": self.openai_api_key,
+            "gpt": self.openai_api_key,
+            "gemini": self.gemini_api_key,
+            "grok": self.grok_api_key,
+            "deepseek": self.deepseek_api_key,
+            "mistral": self.mistral_api_key,
+            "cohere": self.cohere_api_key,
+            "openrouter": self.openrouter_api_key,
+            "ollama": "",
+        }
+        return mapping.get(provider, "")
+    @classmethod
+    def from_yaml(cls, path: str) -> "Settings":
+        import yaml
+        with open(path) as f:
+            data = yaml.safe_load(f) or {}
+        return cls(**data)
+_settings: Settings | None = None
+def get_settings() -> Settings:
+    global _settings
+    if _settings is None:
+        import logging
+        import os
+        logger = logging.getLogger(__name__)
+        yaml_path = os.environ.get("TOKENMIZER_CONFIG", "tokenmizer.yaml")
+        if os.path.exists(yaml_path):
+            try:
+                _settings = Settings.from_yaml(yaml_path)
+            except Exception as e:
+                # FIXED: previously this silently discarded the user's
+                # entire config file and fell back to hardcoded defaults
+                # with ZERO indication anything went wrong. The defaults
+                # are dev-mode-permissive: no API key required, CORS may
+                # be wider than intended, state backend is in-memory (no
+                # Redis). An operator who sets a real config — including
+                # security-relevant fields like `api_key` or
+                # `cors_origins` — could end up running with none of that
+                # applied, with no error, no warning, nothing. This is a
+                # security-relevant failure mode disguised as "graceful
+                # fallback." Logging at `error` (not silent) means a typo
+                # in tokenmizer.yaml is visible at startup instead of
+                # discovered later as "wait, why does this accept
+                # unauthenticated requests?"
+                logger.error(
+                    f"Failed to load config from {yaml_path}: {e}. "
+                    "Falling back to hardcoded defaults — this means any "
+                    "settings in your YAML file (including api_key, "
+                    "cors_origins, state_backend) are NOT applied. Fix the "
+                    "YAML file and restart."
+                )
+                _settings = Settings()
+        else:
+            _settings = Settings()
+    return _settings

tokenmizer/core/__init__.py ADDED Viewed

File without changes

tokenmizer/core/dto.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+DTOs — typed data transfer objects for every layer boundary.
+Rule: no raw dict crosses a layer boundary.
+Each module owns its output DTO. Callers unpack what they need.
+tokenmizer/core/dto.py
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+# ── Graph layer ───────────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class GraphNodeDTO:
+    id: str
+    type: str
+    label: str
+    status: str
+    summary: str
+    importance: float
+    confidence: float
+    age_days: float
+@dataclass(frozen=True)
+class GraphEdgeDTO:
+    source_id: str
+    target_id: str
+    type: str
+    weight: float
+@dataclass(frozen=True)
+class GraphStatsDTO:
+    session_id: str
+    node_count: int
+    edge_count: int
+    by_type: dict
+    by_status: dict
+    processed_messages: int
+    avg_confidence: float
+    # FIXED: previously decision-contradiction-check failures (the logic
+    # that tracks "Changed X → Y" in resume context) were swallowed at
+    # debug level with zero visibility. Non-zero here means that feature
+    # is degraded even though node creation itself kept working fine.
+    decision_tracking_failures: int = 0
+    # True if SQLite could not be initialized for this session — the graph
+    # is running in-memory only, with NO durable persistence whatsoever.
+    # A restart will lose everything. This used to only ever appear in logs.
+    persistence_broken: bool = False
+# ── Checkpoint layer ──────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class CheckpointSummaryDTO:
+    """Lightweight — for list endpoints."""
+    checkpoint_id: str
+    session_id: str
+    created_at: float
+    context_pct: float
+    trigger: str
+    message_count: int
+    resume_tokens: int
+@dataclass(frozen=True)
+class ResumeDTO:
+    session_id: str
+    checkpoint_id: str
+    level: str
+    resume_context: str
+    token_count: int
+    node_count: int
+# ── Provider layer ────────────────────────────────────────────────────────────
+@dataclass
+class LLMResponseDTO:
+    text: str
+    input_tokens: int
+    output_tokens: int
+    model: str
+    provider: str
+    latency_ms: float = 0.0
+    finish_reason: str = "stop"
+    cached: bool = False
+    cost_usd: float = 0.0
+    @property
+    def total_tokens(self) -> int:
+        return self.input_tokens + self.output_tokens
+# ── Compression layer ─────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class CompressionResultDTO:
+    original_tokens: int
+    compressed_tokens: int
+    tokens_saved: int
+    quality_score: float          # 0–1; if < threshold, original was used
+    strategy_used: str
+    was_compressed: bool
+@dataclass(frozen=True)
+class OutputTrimResultDTO:
+    original_tokens: int
+    trimmed_tokens: int
+    tokens_saved: int
+    text: str
+# ── Cache layer ───────────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class CacheStatsDTO:
+    entries: int
+    max_size: int
+    utilization_pct: float
+    evictions: int
+    hit_rate: float
+    hit_exact: int
+    hit_semantic: int
+    miss: int
+    semantic_available: bool
+# ── File intelligence layer ───────────────────────────────────────────────────
+@dataclass(frozen=True)
+class FileExtractionDTO:
+    file_type: str
+    filename: str
+    original_size_bytes: int
+    original_tokens: int
+    extracted_tokens: int
+    tokens_saved: int
+    savings_pct: float
+    content: str
+    summary: str
+    strategy_used: str
+    was_truncated: bool
+# ── Analytics layer ───────────────────────────────────────────────────────────
+@dataclass(frozen=True)
+class PeriodStatsDTO:
+    period: str                    # "daily" | "weekly" | "monthly"
+    requests: int
+    tokens_saved: int
+    savings_pct: float
+    cost_saved_usd: float
+@dataclass(frozen=True)
+class AnalyticsSummaryDTO:
+    total_requests: int
+    daily: PeriodStatsDTO
+    weekly: PeriodStatsDTO
+    monthly: PeriodStatsDTO
+    layer_breakdown: dict
+    by_provider: dict
+    suggestions: list[str]
+# ── Chat API layer ─────────────────────────────────────────────────────────────
+@dataclass
+class ChatSavingsDTO:
+    file_extraction: int = 0
+    compression: int = 0
+    output_trim: int = 0
+    cache: int = 0
+    windowing: int = 0
+    routing: int = 0
+    @property
+    def total(self) -> int:
+        return (self.file_extraction + self.compression + self.output_trim
+                + self.cache + self.windowing + self.routing)
+    def to_dict(self) -> dict:
+        return {
+            "file_extraction": self.file_extraction,
+            "compression": self.compression,
+            "output_trim": self.output_trim,
+            "cache": self.cache,
+            "windowing": self.windowing,
+            "routing": self.routing,
+        }

tokenmizer/core/errors.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Shared error types for TokenMizer."""
+from __future__ import annotations
+class TokenMizerError(Exception):
+    """Base exception."""
+class ProviderError(TokenMizerError):
+    def __init__(self, provider: str, error_type: str, message: str,
+                 retryable: bool = False, retry_after: float = 0.0):
+        self.provider = provider
+        self.error_type = error_type
+        self.retryable = retryable
+        self.retry_after = retry_after
+        super().__init__(f"[{provider}] {error_type}: {message}")
+class ConfigError(TokenMizerError):
+    """Invalid configuration."""
+class StorageError(TokenMizerError):
+    """Persistence failure."""
+class CheckpointPersistError(StorageError):
+    """Checkpoint write failed. Callers MUST NOT treat a checkpoint as
+    successfully created if this is raised — there is no fallback write
+    path, so a swallowed instance of this error means data loss."""
+class GraphPersistError(StorageError):
+    """Graph (node/edge) write failed. Same data-loss caveat as
+    CheckpointPersistError — see that class's docstring."""