PyPI - tokenmeter-cli - Versions diffs - 0.2.0__py3-none-any.whl - Mend

tokenmeter-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

tokenmeter/__init__.py +19 -0
tokenmeter/__main__.py +4 -0
tokenmeter/cli.py +135 -0
tokenmeter/encoder.py +52 -0
tokenmeter/inputs.py +41 -0
tokenmeter/meter.py +57 -0
tokenmeter/pricing.py +67 -0
tokenmeter/render.py +51 -0
tokenmeter_cli-0.2.0.dist-info/METADATA +118 -0
tokenmeter_cli-0.2.0.dist-info/RECORD +13 -0
tokenmeter_cli-0.2.0.dist-info/WHEEL +4 -0
tokenmeter_cli-0.2.0.dist-info/entry_points.txt +2 -0
tokenmeter_cli-0.2.0.dist-info/licenses/LICENSE +21 -0

tokenmeter/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""tokenmeter: count tokens and estimate cost for prompts before sending them."""
+from tokenmeter.meter import Measurement, measure, over_budget, total_cost
+from tokenmeter.pricing import ModelPrice, known_models, price_for
+from tokenmeter.pricing import total_cost as cost
+__version__ = "0.2.0"
+__all__ = [
+    "Measurement",
+    "ModelPrice",
+    "__version__",
+    "cost",
+    "known_models",
+    "measure",
+    "over_budget",
+    "price_for",
+    "total_cost",
+]

tokenmeter/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from tokenmeter.cli import entrypoint
+if __name__ == "__main__":
+    entrypoint()

tokenmeter/cli.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Command-line interface for tokenmeter."""
+from __future__ import annotations
+import json
+import sys
+import typer
+from rich.console import Console
+from rich.table import Table
+from tokenmeter import __version__
+from tokenmeter.encoder import EncoderError, encoder_for_model
+from tokenmeter.inputs import read_inputs
+from tokenmeter.meter import measure, over_budget, total_cost
+from tokenmeter.pricing import (
+    PRICES_AS_OF,
+    UnknownModel,
+    known_models,
+    price_for,
+)
+from tokenmeter.render import measurements_to_json, render_table
+app = typer.Typer(
+    add_completion=False,
+    no_args_is_help=True,
+    help="Count tokens and estimate cost for prompts before you send them.",
+)
+_out = Console()
+_err = Console(stderr=True)
+EXIT_OK = 0
+EXIT_OVER_BUDGET = 1
+EXIT_BAD_INPUT = 2
+def _version_callback(value: bool) -> None:
+    if value:
+        _out.print(f"tokenmeter {__version__}")
+        raise typer.Exit()
+@app.callback()
+def main(
+    _version: bool = typer.Option(
+        False,
+        "--version",
+        callback=_version_callback,
+        is_eager=True,
+        help="Show the version and exit.",
+    ),
+) -> None:
+    """tokenmeter command-line interface."""
+def _collect(paths, model, output_tokens):
+    inputs = read_inputs(paths)
+    encoder = encoder_for_model(model)
+    return [
+        measure(encoder, model, name, text, output_tokens=output_tokens) for name, text in inputs
+    ]
+@app.command("count")
+def count(
+    paths: list[str] = typer.Argument(..., help="Files, directories, or - for stdin."),
+    model: str = typer.Option("gpt-4o", "--model", "-m", help="Model to price against."),
+    output_tokens: int = typer.Option(
+        0, "--output-tokens", help="Assumed completion tokens, for cost only."
+    ),
+    as_json: bool = typer.Option(False, "--json", help="Emit JSON."),
+) -> None:
+    """Count tokens and estimate cost for one or more inputs."""
+    try:
+        measurements = _collect(paths, model, output_tokens)
+    except UnknownModel as exc:
+        _err.print(f"tokenmeter: {exc}; try 'tokenmeter models'")
+        raise typer.Exit(EXIT_BAD_INPUT) from exc
+    except (OSError, EncoderError) as exc:
+        _err.print(f"tokenmeter: {exc}")
+        raise typer.Exit(EXIT_BAD_INPUT) from exc
+    if as_json:
+        _out.print_json(json.dumps(measurements_to_json(measurements)))
+    else:
+        _out.print(render_table(measurements))
+@app.command("budget")
+def budget(
+    paths: list[str] = typer.Argument(..., help="Files, directories, or - for stdin."),
+    max_cost: float = typer.Option(..., "--max-cost", help="Fail above this USD cost."),
+    model: str = typer.Option("gpt-4o", "--model", "-m", help="Model to price against."),
+    output_tokens: int = typer.Option(0, "--output-tokens", help="Assumed completion tokens."),
+) -> None:
+    """Fail when the estimated cost of the inputs exceeds a budget."""
+    try:
+        measurements = _collect(paths, model, output_tokens)
+    except UnknownModel as exc:
+        _err.print(f"tokenmeter: {exc}; try 'tokenmeter models'")
+        raise typer.Exit(EXIT_BAD_INPUT) from exc
+    except (OSError, EncoderError) as exc:
+        _err.print(f"tokenmeter: {exc}")
+        raise typer.Exit(EXIT_BAD_INPUT) from exc
+    cost = total_cost(measurements)
+    _err.print(f"tokenmeter: estimated ${cost:.6f} against a ${max_cost:.6f} budget")
+    if over_budget(measurements, max_cost):
+        raise typer.Exit(EXIT_OVER_BUDGET)
+@app.command("models")
+def models() -> None:
+    """List the known models and their prices."""
+    title = f"prices as of {PRICES_AS_OF} (USD per 1M tokens)"
+    table = Table(box=None, pad_edge=False, title=title)
+    table.add_column("model")
+    table.add_column("encoding")
+    table.add_column("input", justify="right")
+    table.add_column("output", justify="right")
+    for name in known_models():
+        p = price_for(name)
+        table.add_row(p.model, p.encoding, f"${p.input_per_mtok:g}", f"${p.output_per_mtok:g}")
+    _out.print(table)
+def entrypoint() -> None:
+    try:
+        app()
+    except KeyboardInterrupt:  # pragma: no cover - interactive only
+        print("tokenmeter: interrupted", file=sys.stderr)
+        raise SystemExit(130) from None

tokenmeter/encoder.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Token counting behind a small interface.
+The real encoder uses ``tiktoken``, imported lazily so the package installs and
+imports without it and so the test suite can run with a fake encoder and no
+network access. Counting is therefore exact for the supported OpenAI encodings
+at run time, and deterministic in tests.
+"""
+from __future__ import annotations
+from typing import Protocol
+from tokenmeter.pricing import price_for
+class Encoder(Protocol):
+    def count(self, text: str) -> int: ...
+class EncoderError(RuntimeError):
+    """Raised when an encoder cannot be constructed."""
+class TiktokenEncoder:
+    """Count tokens with tiktoken for a given encoding name."""
+    def __init__(self, encoding: str) -> None:
+        self.encoding = encoding
+        self._enc = None
+    def _ensure(self):
+        if self._enc is not None:
+            return self._enc
+        try:
+            import tiktoken
+        except ImportError as exc:  # pragma: no cover - import guard
+            raise EncoderError(
+                "tiktoken is not installed; install tokenmeter with its default "
+                "dependencies to count tokens"
+            ) from exc
+        try:
+            self._enc = tiktoken.get_encoding(self.encoding)
+        except Exception as exc:  # pragma: no cover - needs network on first use
+            raise EncoderError(f"could not load encoding {self.encoding!r}") from exc
+        return self._enc
+    def count(self, text: str) -> int:
+        return len(self._ensure().encode(text))
+def encoder_for_model(model: str) -> Encoder:
+    return TiktokenEncoder(price_for(model).encoding)

tokenmeter/inputs.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""Gather text inputs from files, directories, or standard input."""
+from __future__ import annotations
+import sys
+from collections.abc import Iterable, Sequence
+from pathlib import Path
+TEXT_SUFFIXES = {".txt", ".md", ".prompt", ".jinja", ".j2", ".tmpl"}
+def read_inputs(
+    paths: Sequence[str | Path],
+    *,
+    stdin_text: str | None = None,
+) -> list[tuple[str, str]]:
+    """Return ``(name, text)`` pairs for every requested input.
+    A path of ``-`` reads standard input. Directories are expanded to their
+    text-like files, sorted for stable output.
+    """
+    out: list[tuple[str, str]] = []
+    for raw in paths:
+        if str(raw) == "-":
+            text = stdin_text if stdin_text is not None else sys.stdin.read()
+            out.append(("<stdin>", text))
+            continue
+        path = Path(raw)
+        if path.is_dir():
+            for child in _text_files(path):
+                out.append((str(child), child.read_text(encoding="utf-8")))
+        else:
+            out.append((str(path), path.read_text(encoding="utf-8")))
+    return out
+def _text_files(directory: Path) -> Iterable[Path]:
+    return sorted(
+        p for p in directory.rglob("*") if p.is_file() and p.suffix.lower() in TEXT_SUFFIXES
+    )

tokenmeter/meter.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Combine token counts with prices into measurements and a budget gate."""
+from __future__ import annotations
+from dataclasses import dataclass
+from tokenmeter.encoder import Encoder
+from tokenmeter.pricing import input_cost, output_cost
+@dataclass(frozen=True, slots=True)
+class Measurement:
+    name: str
+    model: str
+    input_tokens: int
+    output_tokens: int
+    input_cost: float
+    output_cost: float
+    @property
+    def total_cost(self) -> float:
+        return self.input_cost + self.output_cost
+    @property
+    def total_tokens(self) -> int:
+        return self.input_tokens + self.output_tokens
+def measure(
+    encoder: Encoder,
+    model: str,
+    name: str,
+    text: str,
+    *,
+    output_tokens: int = 0,
+) -> Measurement:
+    input_tokens = encoder.count(text)
+    return Measurement(
+        name=name,
+        model=model,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        input_cost=input_cost(model, input_tokens),
+        output_cost=output_cost(model, output_tokens),
+    )
+def total_cost(measurements: list[Measurement]) -> float:
+    return sum(m.total_cost for m in measurements)
+def total_tokens(measurements: list[Measurement]) -> int:
+    return sum(m.total_tokens for m in measurements)
+def over_budget(measurements: list[Measurement], max_cost: float) -> bool:
+    return total_cost(measurements) > max_cost

tokenmeter/pricing.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Per-model token prices and the cost arithmetic on top of them.
+Prices are expressed in US dollars per million tokens and carry an "as of"
+date so a stale table is obvious. The numbers are easy to override or extend;
+the cost functions are pure and do not care where the rates came from.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+PRICES_AS_OF = "2025-08-01"
+@dataclass(frozen=True, slots=True)
+class ModelPrice:
+    """Input and output price in USD per million tokens."""
+    model: str
+    encoding: str
+    input_per_mtok: float
+    output_per_mtok: float
+# A small, explicit table. Values are USD per 1,000,000 tokens.
+_PRICES: dict[str, ModelPrice] = {
+    "gpt-4o": ModelPrice("gpt-4o", "o200k_base", 2.50, 10.00),
+    "gpt-4o-mini": ModelPrice("gpt-4o-mini", "o200k_base", 0.15, 0.60),
+    "gpt-4-turbo": ModelPrice("gpt-4-turbo", "cl100k_base", 10.00, 30.00),
+    "gpt-3.5-turbo": ModelPrice("gpt-3.5-turbo", "cl100k_base", 0.50, 1.50),
+    "text-embedding-3-small": ModelPrice("text-embedding-3-small", "cl100k_base", 0.02, 0.0),
+    "text-embedding-3-large": ModelPrice("text-embedding-3-large", "cl100k_base", 0.13, 0.0),
+}
+class UnknownModel(KeyError):
+    """Raised when a model has no entry in the price table."""
+    def __init__(self, model: str) -> None:
+        self.model = model
+        super().__init__(model)
+    def __str__(self) -> str:
+        return f"unknown model: {self.model}"
+def known_models() -> list[str]:
+    return sorted(_PRICES)
+def price_for(model: str) -> ModelPrice:
+    try:
+        return _PRICES[model]
+    except KeyError as exc:
+        raise UnknownModel(model) from exc
+def input_cost(model: str, tokens: int) -> float:
+    return price_for(model).input_per_mtok * tokens / 1_000_000
+def output_cost(model: str, tokens: int) -> float:
+    return price_for(model).output_per_mtok * tokens / 1_000_000
+def total_cost(model: str, input_tokens: int, output_tokens: int = 0) -> float:
+    return input_cost(model, input_tokens) + output_cost(model, output_tokens)

tokenmeter/render.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Render measurements for the terminal and as JSON."""
+from __future__ import annotations
+from rich.console import Group
+from rich.table import Table
+from tokenmeter.meter import Measurement, total_cost, total_tokens
+def measurements_to_json(measurements: list[Measurement]) -> dict:
+    return {
+        "inputs": [
+            {
+                "name": m.name,
+                "model": m.model,
+                "input_tokens": m.input_tokens,
+                "output_tokens": m.output_tokens,
+                "input_cost": round(m.input_cost, 6),
+                "output_cost": round(m.output_cost, 6),
+                "total_cost": round(m.total_cost, 6),
+            }
+            for m in measurements
+        ],
+        "total_tokens": total_tokens(measurements),
+        "total_cost": round(total_cost(measurements), 6),
+    }
+def render_table(measurements: list[Measurement]) -> Group:
+    table = Table(box=None, pad_edge=False)
+    table.add_column("input")
+    table.add_column("in tok", justify="right")
+    table.add_column("out tok", justify="right")
+    table.add_column("cost (USD)", justify="right")
+    for m in measurements:
+        table.add_row(
+            m.name,
+            f"{m.input_tokens}",
+            f"{m.output_tokens}",
+            f"${m.total_cost:.6f}",
+        )
+    if len(measurements) != 1:
+        table.add_section()
+        table.add_row(
+            "total",
+            f"{sum(m.input_tokens for m in measurements)}",
+            f"{sum(m.output_tokens for m in measurements)}",
+            f"${total_cost(measurements):.6f}",
+        )
+    return Group(table)

tokenmeter_cli-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,118 @@
+Metadata-Version: 2.4
+Name: tokenmeter-cli
+Version: 0.2.0
+Summary: Count tokens and estimate cost for prompts before you send them.
+Project-URL: Homepage, https://github.com/jmweb-org/tokenmeter
+Project-URL: Repository, https://github.com/jmweb-org/tokenmeter
+Project-URL: Issues, https://github.com/jmweb-org/tokenmeter/issues
+Author: José del Río
+License: MIT License
+        Copyright (c) 2026 José del Río
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Keywords: budget,cli,cost,llm,openai,tiktoken,tokens
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Utilities
+Requires-Python: >=3.10
+Requires-Dist: rich>=13.0
+Requires-Dist: tiktoken>=0.7
+Requires-Dist: typer>=0.12
+Description-Content-Type: text/markdown
+# tokenmeter
+[![CI](https://github.com/jmweb-org/tokenmeter/actions/workflows/ci.yml/badge.svg)](https://github.com/jmweb-org/tokenmeter/actions/workflows/ci.yml)
+[![PyPI](https://img.shields.io/pypi/v/tokenmeter-cli.svg)](https://pypi.org/project/tokenmeter-cli/)
+[![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org)
+[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+Count tokens and estimate cost for prompts before you send them, from the
+command line or as a CI budget gate.
+Prompt templates grow, a few-shot example gets added, a retrieved context
+balloons, and suddenly every call costs more than you thought. `tokenmeter`
+gives you the exact token count and a dollar estimate up front, for a single
+prompt or a whole directory of templates.
+```console
+$ tokenmeter count prompts/system.txt --model gpt-4o
+input            in tok   out tok   cost (USD)
+prompts/system.txt  812         0   $0.002030
+$ tokenmeter count prompts/ --model gpt-4o-mini --json
+```
+## Install
+```console
+$ pip install tokenmeter-cli                 # from PyPI, once released
+$ pip install git+https://github.com/jmweb-org/tokenmeter   # latest, available now
+```
+Token counting is exact for the supported OpenAI encodings via `tiktoken`.
+## Usage
+```console
+$ tokenmeter count system.txt -m gpt-4o          # one file
+$ tokenmeter count prompts/ -m gpt-4o-mini       # every text file in a directory
+$ cat prompt.txt | tokenmeter count - -m gpt-4o  # standard input
+$ tokenmeter count p.txt --output-tokens 500     # include an assumed completion
+$ tokenmeter models                              # list models and prices
+```
+### As a budget gate
+Fail a build when a prompt set would cost more than you allow:
+```console
+$ tokenmeter budget prompts/ --model gpt-4o --max-cost 0.05
+```
+```yaml
+- run: tokenmeter budget prompts/ --model gpt-4o --max-cost 0.05
+```
+## Cost model
+Counts are real tokens. Cost multiplies tokens by a per-model rate from a small,
+dated price table (`tokenmeter models` prints it with its "as of" date). By
+default only input tokens are counted; pass `--output-tokens N` to add an
+assumed completion length to the estimate. Prices change, so treat the dollar
+figures as estimates and update the table when they move.
+## Exit codes
+| Code | Meaning |
+| --- | --- |
+| 0 | Counted; under budget (or `count` was used) |
+| 1 | `budget` estimate exceeded `--max-cost` |
+| 2 | An input was missing, or the model is unknown |
+## License
+MIT. See [LICENSE](LICENSE).

tokenmeter_cli-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+tokenmeter/__init__.py,sha256=ldifpdHQhVMiQ8V9qi91UJkjDto7XYyhhR7L9I7XxM0,470
+tokenmeter/__main__.py,sha256=5uZgM542ygj2c6D6uKvQmqmQzo7uvG3qu5cLMb_zc68,83
+tokenmeter/cli.py,sha256=4PDR9KLvZprG2P4aSSC48iZiZKZcoNf822_SJlj3F8Q,4234
+tokenmeter/encoder.py,sha256=1CPu9ZHLQAaL8-3-wAOsSAOvteD8TzEl0_oeq3zCGkU,1628
+tokenmeter/inputs.py,sha256=mTH9LgUqqxP5Wvl87-P6lGyG35F09C3ygg-12NEJ4Bg,1247
+tokenmeter/meter.py,sha256=F7t7Ed64hMmTPckiEASYOGB5J5OHBVs2B3k8VLoxx7Y,1394
+tokenmeter/pricing.py,sha256=H4ovbPi1gzlDMTbBkWxOSCtnDAgGpP1Q3ftchOo625E,2062
+tokenmeter/render.py,sha256=a3Xx9HGeD_8-lP-HeRcE0iwUiRa8d-t15lTMWLjC-fk,1614
+tokenmeter_cli-0.2.0.dist-info/METADATA,sha256=v3NRDEch9Cl66SATXhhLXIYoqp8sIVXpJRkcM0bHG0I,4733
+tokenmeter_cli-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+tokenmeter_cli-0.2.0.dist-info/entry_points.txt,sha256=6NQkcMw35tZQW5WpX-MSx-PYG2WIyLwJJkIPFqqpPq8,57
+tokenmeter_cli-0.2.0.dist-info/licenses/LICENSE,sha256=N4nJy_wSxYwULjDvuE2GupQWZSSwgOOU_HJSzuxHBsI,1071
+tokenmeter_cli-0.2.0.dist-info/RECORD,,

tokenmeter_cli-0.2.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

tokenmeter_cli-0.2.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ tokenmeter = tokenmeter.cli:entrypoint

tokenmeter_cli-0.2.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 José del Río
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.