PyPI - tsave - Versions diffs - 0.1.1__py3-none-any.whl - Mend

tsave 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

token_saver/__init__.py +3 -0
token_saver/cli.py +32 -0
token_saver/client.py +160 -0
token_saver/core/__init__.py +0 -0
token_saver/core/analyzer.py +167 -0
token_saver/core/compressor.py +184 -0
token_saver/core/static_analyzer.py +273 -0
token_saver/core/tokenizer.py +130 -0
tsave-0.1.1.dist-info/METADATA +9 -0
tsave-0.1.1.dist-info/RECORD +13 -0
tsave-0.1.1.dist-info/WHEEL +4 -0
tsave-0.1.1.dist-info/entry_points.txt +2 -0
tsave-0.1.1.dist-info/licenses/LICENSE +21 -0

token_saver/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .client import TokenSaverClient
+__all__ = ["TokenSaverClient"]

token_saver/cli.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""CLI entry point: tsave scan <file.py>"""
+import sys
+from pathlib import Path
+from .core.static_analyzer import scan_file
+def main():
+    if len(sys.argv) < 3 or sys.argv[1] != "scan":
+        print("Usage: tsave scan <file.py> [file2.py ...]")
+        sys.exit(1)
+    files = sys.argv[2:]
+    total_findings = 0
+    for f in files:
+        p = Path(f)
+        if not p.exists():
+            print(f"tsave: {f} -- file not found", file=sys.stderr)
+            continue
+        report = scan_file(p)
+        print(report.format())
+        total_findings += len(report.findings)
+        if len(files) > 1:
+            print()
+    sys.exit(1 if total_findings > 0 else 0)
+if __name__ == "__main__":
+    main()

token_saver/client.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""TokenSaverClient — drop-in replacement for anthropic.Anthropic with built-in cost tracking."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+import anthropic
+from .core.tokenizer import (
+    PRICING,
+    CostEstimate,
+    TokenCount,
+    count_tokens,
+    estimate_cost,
+    monthly_projection,
+)
+from .core.analyzer import AnalysisReport, analyze
+from .core.compressor import CompressedResult, compress
+@dataclass
+class UsageRecord:
+    model: str
+    input_tokens: int
+    output_tokens: int
+    cache_read_tokens: int = 0
+    cache_creation_tokens: int = 0
+    @property
+    def input_cost(self) -> float:
+        rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
+        return self.input_tokens * rate_in / 1_000_000
+    @property
+    def output_cost(self) -> float:
+        _, rate_out = PRICING.get(self.model, (3.00, 15.00))
+        return self.output_tokens * rate_out / 1_000_000
+    @property
+    def total_cost(self) -> float:
+        return self.input_cost + self.output_cost
+class TokenSaverClient:
+    """Wraps anthropic.Anthropic with token counting, cost tracking, analysis, and compression."""
+    def __init__(self, **kwargs):
+        self._client = anthropic.Anthropic(**kwargs)
+        self._history: list[UsageRecord] = []
+    @property
+    def raw(self) -> anthropic.Anthropic:
+        return self._client
+    @property
+    def history(self) -> list[UsageRecord]:
+        return list(self._history)
+    @property
+    def total_cost(self) -> float:
+        return sum(r.total_cost for r in self._history)
+    @property
+    def total_input_tokens(self) -> int:
+        return sum(r.input_tokens for r in self._history)
+    @property
+    def total_output_tokens(self) -> int:
+        return sum(r.output_tokens for r in self._history)
+    def create(self, **kwargs) -> anthropic.types.Message:
+        response = self._client.messages.create(**kwargs)
+        usage = response.usage
+        record = UsageRecord(
+            model=kwargs.get("model", response.model),
+            input_tokens=usage.input_tokens,
+            output_tokens=usage.output_tokens,
+            cache_read_tokens=getattr(usage, "cache_read_input_tokens", 0) or 0,
+            cache_creation_tokens=getattr(usage, "cache_creation_input_tokens", 0) or 0,
+        )
+        self._history.append(record)
+        return response
+    def count_tokens(
+        self,
+        *,
+        model: str,
+        messages: list[dict],
+        system: str | list[dict] | None = None,
+        tools: list[dict] | None = None,
+    ) -> TokenCount:
+        return count_tokens(self._client, model=model, messages=messages, system=system, tools=tools)
+    def estimate_cost(
+        self,
+        *,
+        model: str,
+        messages: list[dict],
+        estimated_output_tokens: int = 1000,
+        system: str | list[dict] | None = None,
+        tools: list[dict] | None = None,
+    ) -> CostEstimate:
+        return estimate_cost(
+            self._client,
+            model=model,
+            messages=messages,
+            estimated_output_tokens=estimated_output_tokens,
+            system=system,
+            tools=tools,
+        )
+    def analyze(
+        self,
+        *,
+        model: str,
+        messages: list[dict],
+        system: str | list[dict] | None = None,
+        tools: list[dict] | None = None,
+    ) -> AnalysisReport:
+        return analyze(self._client, model=model, messages=messages, system=system, tools=tools)
+    def compress(
+        self,
+        *,
+        model: str,
+        messages: list[dict],
+        target_reduction: float = 0.5,
+        query: str | None = None,
+        keep_last_n: int = 4,
+    ) -> CompressedResult:
+        return compress(
+            self._client,
+            model=model,
+            messages=messages,
+            target_reduction=target_reduction,
+            query=query,
+            keep_last_n=keep_last_n,
+        )
+    def monthly_projection(self, requests_per_day: int, days: int = 30):
+        if not self._history:
+            return monthly_projection(0.0, requests_per_day, days)
+        avg_cost = self.total_cost / len(self._history)
+        return monthly_projection(avg_cost, requests_per_day, days)
+    def usage_summary(self) -> str:
+        n = len(self._history)
+        if n == 0:
+            return "No requests tracked yet."
+        lines = [
+            f"=== Usage Summary ({n} requests) ===",
+            f"Total input tokens:  {self.total_input_tokens:,}",
+            f"Total output tokens: {self.total_output_tokens:,}",
+            f"Total cost:          ${self.total_cost:.4f}",
+            f"Avg cost/request:    ${self.total_cost / n:.4f}",
+        ]
+        models_used = set(r.model for r in self._history)
+        if len(models_used) > 1:
+            lines.append(f"Models used: {', '.join(sorted(models_used))}")
+        return "\n".join(lines)

token_saver/core/__init__.py ADDED Viewed

File without changes

token_saver/core/analyzer.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""Pre-send prescriptive analysis with optimization suggestions."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+import anthropic
+from .tokenizer import PRICING, count_tokens
+@dataclass
+class Suggestion:
+    category: str
+    message: str
+    estimated_saving_pct: float = 0.0
+@dataclass
+class AnalysisReport:
+    model: str
+    input_tokens: int
+    suggestions: list[Suggestion] = field(default_factory=list)
+    alternative_models: list[dict] = field(default_factory=list)
+    @property
+    def input_cost(self) -> float:
+        rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
+        return self.input_tokens * rate_in / 1_000_000
+    @property
+    def potential_savings_pct(self) -> float:
+        if not self.suggestions:
+            return 0.0
+        return max(s.estimated_saving_pct for s in self.suggestions)
+    def format(self) -> str:
+        lines = [
+            f"=== Analysis Report ===",
+            f"Model: {self.model}",
+            f"Input tokens: {self.input_tokens:,}",
+            f"Estimated input cost: ${self.input_cost:.4f}",
+        ]
+        if self.suggestions:
+            lines.append(f"\nSuggestions ({len(self.suggestions)}):")
+            for i, s in enumerate(self.suggestions, 1):
+                saving = f" (~{s.estimated_saving_pct:.0f}% saving)" if s.estimated_saving_pct else ""
+                lines.append(f"  {i}. [{s.category}] {s.message}{saving}")
+        else:
+            lines.append("\nNo optimization suggestions -- looks good!")
+        if self.alternative_models:
+            lines.append("\nAlternative models:")
+            for alt in self.alternative_models:
+                lines.append(f"  - {alt['model']}: ${alt['cost']:.4f} (save ${alt['saving']:.4f})")
+        return "\n".join(lines)
+def _check_message_length(messages: list[dict]) -> list[Suggestion]:
+    suggestions = []
+    for i, msg in enumerate(messages):
+        content = msg.get("content", "")
+        if isinstance(content, str) and len(content) > 50_000:
+            suggestions.append(Suggestion(
+                category="large-message",
+                message=f"Message {i} has {len(content):,} chars — consider compressing or chunking",
+                estimated_saving_pct=30.0,
+            ))
+    return suggestions
+def _check_system_prompt(system: str | list[dict] | None) -> list[Suggestion]:
+    suggestions = []
+    if system is None:
+        return suggestions
+    text = system if isinstance(system, str) else " ".join(
+        b.get("text", "") for b in system if isinstance(b, dict)
+    )
+    if len(text) > 10_000:
+        suggestions.append(Suggestion(
+            category="large-system-prompt",
+            message=f"System prompt is {len(text):,} chars — consider trimming or using caching",
+            estimated_saving_pct=20.0,
+        ))
+    return suggestions
+def _check_redundant_turns(messages: list[dict]) -> list[Suggestion]:
+    suggestions = []
+    if len(messages) > 20:
+        suggestions.append(Suggestion(
+            category="long-conversation",
+            message=f"Conversation has {len(messages)} turns — consider summarizing older turns",
+            estimated_saving_pct=40.0,
+        ))
+    return suggestions
+def _check_caching(system: str | list[dict] | None, tools: list[dict] | None) -> list[Suggestion]:
+    suggestions = []
+    has_cache_control = False
+    if isinstance(system, list):
+        for block in system:
+            if isinstance(block, dict) and "cache_control" in block:
+                has_cache_control = True
+                break
+    if tools:
+        for tool in tools:
+            if isinstance(tool, dict) and "cache_control" in tool:
+                has_cache_control = True
+                break
+    sys_text = ""
+    if isinstance(system, str):
+        sys_text = system
+    elif isinstance(system, list):
+        sys_text = " ".join(b.get("text", "") for b in system if isinstance(b, dict))
+    if not has_cache_control and (len(sys_text) > 2048 or (tools and len(tools) > 3)):
+        suggestions.append(Suggestion(
+            category="no-caching",
+            message="Large system prompt or many tools without cache_control — enable prompt caching for 90% input cost reduction on cache hits",
+            estimated_saving_pct=50.0,
+        ))
+    return suggestions
+def _find_cheaper_models(model: str, input_tokens: int) -> list[dict]:
+    current_rate, _ = PRICING.get(model, (3.00, 15.00))
+    current_cost = input_tokens * current_rate / 1_000_000
+    alternatives = []
+    for alt_model, (alt_rate, _) in sorted(PRICING.items(), key=lambda x: x[1][0]):
+        if alt_rate < current_rate and alt_model != model:
+            alt_cost = input_tokens * alt_rate / 1_000_000
+            alternatives.append({
+                "model": alt_model,
+                "cost": alt_cost,
+                "saving": current_cost - alt_cost,
+            })
+    return alternatives
+def analyze(
+    client: anthropic.Anthropic,
+    *,
+    model: str,
+    messages: list[dict],
+    system: str | list[dict] | None = None,
+    tools: list[dict] | None = None,
+) -> AnalysisReport:
+    tc = count_tokens(client, model=model, messages=messages, system=system, tools=tools)
+    suggestions: list[Suggestion] = []
+    suggestions.extend(_check_message_length(messages))
+    suggestions.extend(_check_system_prompt(system))
+    suggestions.extend(_check_redundant_turns(messages))
+    suggestions.extend(_check_caching(system, tools))
+    alternatives = _find_cheaper_models(model, tc.input_tokens)
+    return AnalysisReport(
+        model=model,
+        input_tokens=tc.input_tokens,
+        suggestions=suggestions,
+        alternative_models=alternatives,
+    )

token_saver/core/compressor.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""Semantic compressor with relevance scoring for conversation history."""
+from __future__ import annotations
+from dataclasses import dataclass
+import anthropic
+@dataclass
+class CompressedResult:
+    original_messages: list[dict]
+    compressed_messages: list[dict]
+    original_tokens: int
+    compressed_tokens: int
+    @property
+    def reduction_pct(self) -> float:
+        if self.original_tokens == 0:
+            return 0.0
+        return (1 - self.compressed_tokens / self.original_tokens) * 100
+    def format(self) -> str:
+        return (
+            f"Original:   {self.original_tokens:,} tokens ({len(self.original_messages)} messages)\n"
+            f"Compressed: {self.compressed_tokens:,} tokens ({len(self.compressed_messages)} messages)\n"
+            f"Reduction:  {self.reduction_pct:.1f}%"
+        )
+def _score_message_relevance(message: dict, query: str | None) -> float:
+    content = message.get("content", "")
+    if isinstance(content, list):
+        content = " ".join(
+            block.get("text", "") for block in content
+            if isinstance(block, dict) and block.get("type") == "text"
+        )
+    if not isinstance(content, str):
+        return 1.0
+    score = 0.5
+    role = message.get("role", "")
+    if role == "assistant":
+        score += 0.1
+    if role == "system":
+        return 1.0
+    has_tool_use = False
+    raw_content = message.get("content", "")
+    if isinstance(raw_content, list):
+        for block in raw_content:
+            if isinstance(block, dict) and block.get("type") in ("tool_use", "tool_result"):
+                has_tool_use = True
+                break
+    if has_tool_use:
+        score += 0.2
+    if query:
+        query_words = set(query.lower().split())
+        content_words = set(content.lower().split())
+        overlap = len(query_words & content_words)
+        if query_words:
+            score += 0.3 * (overlap / len(query_words))
+    return min(score, 1.0)
+def _summarize_messages(
+    client: anthropic.Anthropic,
+    messages_to_summarize: list[dict],
+    model: str,
+) -> str:
+    conversation_text = []
+    for msg in messages_to_summarize:
+        role = msg.get("role", "unknown")
+        content = msg.get("content", "")
+        if isinstance(content, list):
+            content = " ".join(
+                block.get("text", "") for block in content
+                if isinstance(block, dict) and block.get("type") == "text"
+            )
+        if isinstance(content, str) and content.strip():
+            conversation_text.append(f"[{role}]: {content}")
+    if not conversation_text:
+        return ""
+    joined = "\n".join(conversation_text)
+    resp = client.messages.create(
+        model=model,
+        max_tokens=512,
+        messages=[{
+            "role": "user",
+            "content": (
+                "Compress this conversation into 2-3 sentences max. "
+                "Keep only facts, decisions, and key terms. No filler.\n\n"
+                f"{joined}"
+            ),
+        }],
+    )
+    return resp.content[0].text
+def compress(
+    client: anthropic.Anthropic,
+    *,
+    model: str,
+    messages: list[dict],
+    target_reduction: float = 0.5,
+    query: str | None = None,
+    keep_last_n: int = 4,
+) -> CompressedResult:
+    if len(messages) <= keep_last_n:
+        tc = client.messages.count_tokens(model=model, messages=messages)
+        return CompressedResult(
+            original_messages=messages,
+            compressed_messages=list(messages),
+            original_tokens=tc.input_tokens,
+            compressed_tokens=tc.input_tokens,
+        )
+    original_tc = client.messages.count_tokens(model=model, messages=messages)
+    protected = messages[-keep_last_n:]
+    candidates = messages[:-keep_last_n]
+    if not candidates:
+        compressed_tc = client.messages.count_tokens(model=model, messages=protected)
+        return CompressedResult(
+            original_messages=messages,
+            compressed_messages=list(protected),
+            original_tokens=original_tc.input_tokens,
+            compressed_tokens=compressed_tc.input_tokens,
+        )
+    summary = _summarize_messages(client, candidates, model)
+    compressed_messages = []
+    if summary:
+        compressed_messages.append({
+            "role": "user",
+            "content": f"[Prior context] {summary}",
+        })
+        compressed_messages.append({
+            "role": "assistant",
+            "content": "Understood.",
+        })
+    compressed_messages.extend(protected)
+    if not compressed_messages:
+        compressed_messages = list(protected)
+    if compressed_messages and compressed_messages[0].get("role") != "user":
+        compressed_messages.insert(0, {
+            "role": "user",
+            "content": "[Conversation continues from earlier context]",
+        })
+    final = []
+    prev_role = None
+    for msg in compressed_messages:
+        role = msg.get("role")
+        if role == prev_role and role in ("user", "assistant"):
+            existing = final[-1].get("content", "")
+            new_content = msg.get("content", "")
+            if isinstance(existing, str) and isinstance(new_content, str):
+                final[-1] = {**final[-1], "content": f"{existing}\n\n{new_content}"}
+            else:
+                final.append(msg)
+        else:
+            final.append(msg)
+        prev_role = role
+    compressed_tc = client.messages.count_tokens(model=model, messages=final)
+    return CompressedResult(
+        original_messages=messages,
+        compressed_messages=final,
+        original_tokens=original_tc.input_tokens,
+        compressed_tokens=compressed_tc.input_tokens,
+    )

token_saver/core/static_analyzer.py ADDED Viewed

@@ -0,0 +1,273 @@
+"""Static analyzer: scans Python source for token-wasting patterns before execution."""
+from __future__ import annotations
+import ast
+import textwrap
+from dataclasses import dataclass, field
+from pathlib import Path
+@dataclass
+class Finding:
+    file: str
+    line: int
+    rule: str
+    message: str
+    estimated_waste_tokens: int
+    fix: str
+    def format(self) -> str:
+        return (
+            f"  {self.file}:{self.line}  [{self.rule}]\n"
+            f"  {self.message}\n"
+            f"  ~{self.estimated_waste_tokens:,} tokens wasted per call\n"
+            f"  Fix:\n"
+            + textwrap.indent(self.fix, "    ")
+        )
+@dataclass
+class ScanReport:
+    file: str
+    findings: list[Finding] = field(default_factory=list)
+    @property
+    def total_estimated_waste(self) -> int:
+        return sum(f.estimated_waste_tokens for f in self.findings)
+    def format(self) -> str:
+        if not self.findings:
+            return f"tsave: {self.file} -- no issues found"
+        lines = [f"tsave: {self.file} -- {len(self.findings)} issue(s)\n"]
+        for f in self.findings:
+            lines.append(f.format())
+            lines.append("")
+        lines.append(f"Total estimated waste: ~{self.total_estimated_waste:,} tokens/call")
+        return "\n".join(lines)
+_API_CALL_ATTRS = {
+    "create", "stream", "count_tokens",
+}
+_EXPENSIVE_MODELS = {"claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6", "claude-fable-5"}
+def _is_api_call(node: ast.Call) -> bool:
+    if isinstance(node.func, ast.Attribute) and node.func.attr in _API_CALL_ATTRS:
+        return True
+    return False
+def _get_string_value(node: ast.expr) -> str | None:
+    if isinstance(node, ast.Constant) and isinstance(node.value, str):
+        return node.value
+    return None
+def _get_keyword(call: ast.Call, name: str) -> ast.keyword | None:
+    for kw in call.keywords:
+        if kw.arg == name:
+            return kw
+    return None
+class _Visitor(ast.NodeVisitor):
+    def __init__(self, filename: str, source_lines: list[str]):
+        self.filename = filename
+        self.source_lines = source_lines
+        self.findings: list[Finding] = []
+        self._loop_stack: list[ast.AST] = []
+        self._system_assignments: list[int] = []
+        self._seen_models: list[tuple[int, str]] = []
+    def _in_loop(self) -> bool:
+        return len(self._loop_stack) > 0
+    def visit_For(self, node: ast.For):
+        self._loop_stack.append(node)
+        self.generic_visit(node)
+        self._loop_stack.pop()
+    def visit_While(self, node: ast.While):
+        self._loop_stack.append(node)
+        self.generic_visit(node)
+        self._loop_stack.pop()
+    def visit_Call(self, node: ast.Call):
+        if _is_api_call(node):
+            self._check_api_in_loop(node)
+            self._check_file_read_in_call(node)
+            self._check_model_routing(node)
+            self._check_no_caching(node)
+        self.generic_visit(node)
+    def visit_Assign(self, node: ast.Assign):
+        for target in node.targets:
+            if isinstance(target, ast.Name) and "system" in target.id.lower():
+                self._system_assignments.append(node.lineno)
+        self.generic_visit(node)
+    def _check_api_in_loop(self, node: ast.Call):
+        if not self._in_loop():
+            return
+        self.findings.append(Finding(
+            file=self.filename,
+            line=node.lineno,
+            rule="api-in-loop",
+            message="API call inside a loop — each iteration sends a full request",
+            estimated_waste_tokens=5000,
+            fix=textwrap.dedent("""\
+                # Batch messages or collect results, then make one call
+                results = []
+                for item in items:
+                    results.append(item)
+                response = client.messages.create(
+                    model="claude-haiku-4-5",
+                    messages=[{"role": "user", "content": "\\n".join(results)}],
+                )"""),
+        ))
+    def _check_file_read_in_call(self, node: ast.Call):
+        subtree = ast.dump(node)
+        if "read" not in subtree.lower() and "open" not in subtree.lower():
+            return
+        for child in ast.walk(node):
+            if not isinstance(child, ast.Call):
+                continue
+            if child is node:
+                continue
+            if isinstance(child.func, ast.Attribute) and child.func.attr in ("read", "read_text"):
+                self.findings.append(Finding(
+                    file=self.filename,
+                    line=node.lineno,
+                    rule="full-file-per-call",
+                    message="Entire file read and passed in every API call -- chunk or summarize first",
+                    estimated_waste_tokens=10000,
+                    fix=textwrap.dedent("""\
+                        # Read once, chunk, send only relevant parts
+                        content = Path("doc.txt").read_text()
+                        chunks = [content[i:i+4000] for i in range(0, len(content), 4000)]
+                        response = client.messages.create(
+                            messages=[{"role": "user", "content": chunks[0]}],
+                        )"""),
+                ))
+                return
+    def _check_model_routing(self, node: ast.Call):
+        model_kw = _get_keyword(node, "model")
+        if model_kw is None:
+            return
+        model_val = _get_string_value(model_kw.value)
+        if model_val is None:
+            return
+        self._seen_models.append((node.lineno, model_val))
+        if model_val not in _EXPENSIVE_MODELS:
+            return
+        is_simple = True
+        msg_kw = _get_keyword(node, "messages")
+        if msg_kw and isinstance(msg_kw.value, ast.List) and len(msg_kw.value.elts) <= 2:
+            tools_kw = _get_keyword(node, "tools")
+            if tools_kw is None:
+                is_simple = True
+        if is_simple and model_val in _EXPENSIVE_MODELS:
+            self.findings.append(Finding(
+                file=self.filename,
+                line=node.lineno,
+                rule="no-model-routing",
+                message=f"Using {model_val} for a simple call — Haiku may suffice",
+                estimated_waste_tokens=0,
+                fix=textwrap.dedent(f"""\
+                    # Route by complexity
+                    model = "claude-haiku-4-5"  # simple tasks
+                    # model = "{model_val}"     # complex tasks only"""),
+            ))
+    def _check_no_caching(self, node: ast.Call):
+        sys_kw = _get_keyword(node, "system")
+        if sys_kw is None:
+            return
+        has_cache = False
+        if isinstance(sys_kw.value, ast.List):
+            for elt in sys_kw.value.elts:
+                if isinstance(elt, ast.Dict):
+                    for key in elt.keys:
+                        if isinstance(key, ast.Constant) and key.value == "cache_control":
+                            has_cache = True
+        if isinstance(sys_kw.value, (ast.Constant, ast.JoinedStr)):
+            pass
+        if not has_cache and self._in_loop():
+            self.findings.append(Finding(
+                file=self.filename,
+                line=node.lineno,
+                rule="uncached-system-prompt",
+                message="System prompt sent in loop without cache_control — reparsed every call",
+                estimated_waste_tokens=2000,
+                fix=textwrap.dedent("""\
+                    system=[{
+                        "type": "text",
+                        "text": system_prompt,
+                        "cache_control": {"type": "ephemeral"},
+                    }]"""),
+            ))
+    def finalize(self):
+        if len(self._system_assignments) > 1:
+            self.findings.append(Finding(
+                file=self.filename,
+                line=self._system_assignments[-1],
+                rule="system-prompt-redefined",
+                message=f"System prompt assigned {len(self._system_assignments)} times — define once and cache",
+                estimated_waste_tokens=2000,
+                fix=textwrap.dedent("""\
+                    # Define once at module level with cache_control
+                    SYSTEM = [{"type": "text", "text": prompt, "cache_control": {"type": "ephemeral"}}]"""),
+            ))
+        self._check_uncompressed_history()
+    def _check_uncompressed_history(self):
+        for i, line in enumerate(self.source_lines, 1):
+            stripped = line.strip()
+            if ".append(" in stripped and "messages" in stripped.lower():
+                context_start = max(0, i - 5)
+                context = "\n".join(self.source_lines[context_start:i + 5])
+                if "compres" not in context.lower() and "summar" not in context.lower() and "compact" not in context.lower():
+                    has_loop = any(
+                        kw in context for kw in ("for ", "while ", "def chat", "def conversation")
+                    )
+                    if has_loop:
+                        self.findings.append(Finding(
+                            file=self.filename,
+                            line=i,
+                            rule="uncompressed-history",
+                            message="Messages appended in a loop without compression — history grows unbounded",
+                            estimated_waste_tokens=8000,
+                            fix=textwrap.dedent("""\
+                                # Compress history when it grows large
+                                if len(messages) > 20:
+                                    result = client.compress(model=model, messages=messages)
+                                    messages = result.compressed_messages"""),
+                        ))
+                        return
+def scan_source(source: str, filename: str = "<stdin>") -> ScanReport:
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        return ScanReport(file=filename)
+    lines = source.splitlines()
+    visitor = _Visitor(filename, lines)
+    visitor.visit(tree)
+    visitor.finalize()
+    return ScanReport(file=filename, findings=visitor.findings)
+def scan_file(path: str | Path) -> ScanReport:
+    p = Path(path)
+    source = p.read_text(encoding="utf-8")
+    return scan_source(source, str(p))

token_saver/core/tokenizer.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Token counting, cost estimation, and monthly projection using the Anthropic API."""
+from __future__ import annotations
+from dataclasses import dataclass
+import anthropic
+PRICING: dict[str, tuple[float, float]] = {
+    "claude-fable-5":      (10.00, 50.00),
+    "claude-mythos-5":     (10.00, 50.00),
+    "claude-opus-4-8":     (5.00, 25.00),
+    "claude-opus-4-7":     (5.00, 25.00),
+    "claude-opus-4-6":     (5.00, 25.00),
+    "claude-sonnet-4-6":   (3.00, 15.00),
+    "claude-haiku-4-5":    (1.00, 5.00),
+}
+CACHE_READ_DISCOUNT = 0.1
+CACHE_WRITE_MULTIPLIER = 1.25
+@dataclass
+class TokenCount:
+    input_tokens: int
+    model: str
+    @property
+    def input_cost(self) -> float:
+        rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
+        return self.input_tokens * rate_in / 1_000_000
+    def format(self) -> str:
+        return f"{self.input_tokens:,} input tokens | est. ${self.input_cost:.4f}"
+@dataclass
+class CostEstimate:
+    input_tokens: int
+    estimated_output_tokens: int
+    model: str
+    @property
+    def input_cost(self) -> float:
+        rate_in, _ = PRICING.get(self.model, (3.00, 15.00))
+        return self.input_tokens * rate_in / 1_000_000
+    @property
+    def output_cost(self) -> float:
+        _, rate_out = PRICING.get(self.model, (3.00, 15.00))
+        return self.estimated_output_tokens * rate_out / 1_000_000
+    @property
+    def total_cost(self) -> float:
+        return self.input_cost + self.output_cost
+    def format(self) -> str:
+        return (
+            f"Input:  {self.input_tokens:>10,} tokens  ${self.input_cost:.4f}\n"
+            f"Output: {self.estimated_output_tokens:>10,} tokens  ${self.output_cost:.4f}  (est.)\n"
+            f"Total:  {'':>10}          ${self.total_cost:.4f}"
+        )
+@dataclass
+class MonthlyProjection:
+    cost_per_request: float
+    requests_per_day: int
+    days: int = 30
+    @property
+    def daily_cost(self) -> float:
+        return self.cost_per_request * self.requests_per_day
+    @property
+    def monthly_cost(self) -> float:
+        return self.daily_cost * self.days
+    def format(self) -> str:
+        return (
+            f"Per request: ${self.cost_per_request:.4f}\n"
+            f"Daily ({self.requests_per_day} req/day): ${self.daily_cost:.2f}\n"
+            f"Monthly ({self.days} days): ${self.monthly_cost:.2f}"
+        )
+def count_tokens(
+    client: anthropic.Anthropic,
+    *,
+    model: str,
+    messages: list[dict],
+    system: str | list[dict] | None = None,
+    tools: list[dict] | None = None,
+) -> TokenCount:
+    kwargs: dict = {"model": model, "messages": messages}
+    if system is not None:
+        kwargs["system"] = system
+    if tools is not None:
+        kwargs["tools"] = tools
+    resp = client.messages.count_tokens(**kwargs)
+    return TokenCount(input_tokens=resp.input_tokens, model=model)
+def estimate_cost(
+    client: anthropic.Anthropic,
+    *,
+    model: str,
+    messages: list[dict],
+    estimated_output_tokens: int = 1000,
+    system: str | list[dict] | None = None,
+    tools: list[dict] | None = None,
+) -> CostEstimate:
+    tc = count_tokens(client, model=model, messages=messages, system=system, tools=tools)
+    return CostEstimate(
+        input_tokens=tc.input_tokens,
+        estimated_output_tokens=estimated_output_tokens,
+        model=model,
+    )
+def monthly_projection(
+    cost_per_request: float,
+    requests_per_day: int,
+    days: int = 30,
+) -> MonthlyProjection:
+    return MonthlyProjection(
+        cost_per_request=cost_per_request,
+        requests_per_day=requests_per_day,
+        days=days,
+    )

tsave-0.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,9 @@
+Metadata-Version: 2.4
+Name: tsave
+Version: 0.1.1
+Summary: Drop-in Anthropic client wrapper with token counting, cost analysis, and semantic compression
+License-File: LICENSE
+Requires-Python: >=3.10
+Requires-Dist: anthropic>=0.40.0
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == 'dev'

tsave-0.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+token_saver/__init__.py,sha256=xfH9FtGMJ6tEywgI96niLPtULSXTAf5JOnfp4n-AR6U,69
+token_saver/cli.py,sha256=qR7N0EXka9KLyfQMuxcWmL5wJN7Ig4inIFQmDt6kgN0,718
+token_saver/client.py,sha256=YdWSdT9y4juI0AHTAomFMQYMYc0DnMP7xDVlJ2XWc9Q,4894
+token_saver/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+token_saver/core/analyzer.py,sha256=yLqCenRJL30pXJVOM2kYrGnEFApU-eaE1ZmaW_SeWek,5731
+token_saver/core/compressor.py,sha256=slk8aVHITfU-Du6BamqSqyF1JDjXXpXZGue7_IqzSr8,5659
+token_saver/core/static_analyzer.py,sha256=XFP0ZqCRYisTolR_yn46TdpEgUv5oIwfND3lqydhMPw,10289
+token_saver/core/tokenizer.py,sha256=et2ROwTnFmAPSRRlYZ_LhU5WIY1Qzye7hu1zu3zXvdE,3595
+tsave-0.1.1.dist-info/METADATA,sha256=qiAcFMNhhFW21h9gQcITPNPu-47bY2o2ABbqB1GcqLY,294
+tsave-0.1.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+tsave-0.1.1.dist-info/entry_points.txt,sha256=VLggJfmMFeHKMlOxCSChaMX4YJkv-oIqtP6rZ4cym4I,47
+tsave-0.1.1.dist-info/licenses/LICENSE,sha256=Ix-GirRZv0ZcXBD7Xzo8OipKG5xK3CXRBp6xnv14Ryw,1068
+tsave-0.1.1.dist-info/RECORD,,

tsave-0.1.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

tsave-0.1.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ tsave = token_saver.cli:main

tsave-0.1.1.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Remo Pulcini
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.