tokenhelm 0.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenhelm/__init__.py +82 -0
- tokenhelm/adapters/__init__.py +20 -0
- tokenhelm/adapters/anthropic.py +113 -0
- tokenhelm/adapters/base.py +73 -0
- tokenhelm/adapters/gemini.py +92 -0
- tokenhelm/adapters/ollama.py +69 -0
- tokenhelm/adapters/openai.py +81 -0
- tokenhelm/core/__init__.py +1 -0
- tokenhelm/core/calculator.py +61 -0
- tokenhelm/core/config.py +14 -0
- tokenhelm/core/errors.py +24 -0
- tokenhelm/core/extraction.py +48 -0
- tokenhelm/core/models.py +145 -0
- tokenhelm/core/tracker.py +153 -0
- tokenhelm/data/pricing.yaml +39 -0
- tokenhelm/dispatch/__init__.py +1 -0
- tokenhelm/dispatch/base.py +19 -0
- tokenhelm/dispatch/default.py +75 -0
- tokenhelm/logging/__init__.py +1 -0
- tokenhelm/logging/base.py +20 -0
- tokenhelm/logging/console.py +30 -0
- tokenhelm/logging/file.py +30 -0
- tokenhelm/logging/json.py +29 -0
- tokenhelm/pricing/__init__.py +1 -0
- tokenhelm/pricing/base.py +20 -0
- tokenhelm/pricing/yaml_provider.py +73 -0
- tokenhelm/py.typed +1 -0
- tokenhelm/sdk/__init__.py +1 -0
- tokenhelm/sdk/client.py +153 -0
- tokenhelm/sdk/context.py +205 -0
- tokenhelm/storage/__init__.py +1 -0
- tokenhelm/storage/base.py +25 -0
- tokenhelm/storage/memory.py +32 -0
- tokenhelm-0.1.0rc1.dist-info/METADATA +260 -0
- tokenhelm-0.1.0rc1.dist-info/RECORD +38 -0
- tokenhelm-0.1.0rc1.dist-info/WHEEL +5 -0
- tokenhelm-0.1.0rc1.dist-info/licenses/LICENSE +21 -0
- tokenhelm-0.1.0rc1.dist-info/top_level.txt +1 -0
tokenhelm/__init__.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""TokenHelm — lightweight, framework-agnostic LLM token tracking and cost calculation.
|
|
2
|
+
|
|
3
|
+
Public surface per ``specs/001-core-sdk/contracts/public-api.md``. Names exported here are the
|
|
4
|
+
v0.x stable contract (Principle X). This slice (User Story 1) ships the OpenAI vertical;
|
|
5
|
+
additional providers, loggers, storage, and streaming land in later phases.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
# Errors
|
|
11
|
+
# Extension-point interfaces (Principle VI)
|
|
12
|
+
from .adapters.anthropic import AnthropicAdapter
|
|
13
|
+
from .adapters.base import BaseAdapter, StreamAggregator
|
|
14
|
+
from .adapters.gemini import GeminiAdapter
|
|
15
|
+
from .adapters.ollama import OllamaAdapter
|
|
16
|
+
from .adapters.openai import OpenAIAdapter
|
|
17
|
+
from .core.calculator import CostCalculator
|
|
18
|
+
from .core.errors import ProviderNotInstalledError, TokenHelmError
|
|
19
|
+
|
|
20
|
+
# Core data model
|
|
21
|
+
from .core.models import (
|
|
22
|
+
LLMCost,
|
|
23
|
+
LLMEvent,
|
|
24
|
+
LLMProvider,
|
|
25
|
+
LLMRequest,
|
|
26
|
+
LLMUsage,
|
|
27
|
+
RateEntry,
|
|
28
|
+
)
|
|
29
|
+
from .dispatch.base import EventDispatcher
|
|
30
|
+
from .dispatch.default import DefaultEventDispatcher
|
|
31
|
+
from .logging.base import Logger
|
|
32
|
+
from .logging.console import ConsoleLogger
|
|
33
|
+
from .logging.file import FileLogger
|
|
34
|
+
from .logging.json import JSONLogger
|
|
35
|
+
from .pricing.base import PricingProvider
|
|
36
|
+
from .pricing.yaml_provider import YamlPricingProvider
|
|
37
|
+
|
|
38
|
+
# Client + scope
|
|
39
|
+
from .sdk.client import TokenHelm
|
|
40
|
+
from .sdk.context import StreamSession, TraceScope
|
|
41
|
+
from .storage.base import StorageBackend
|
|
42
|
+
from .storage.memory import InMemoryStorageBackend
|
|
43
|
+
|
|
44
|
+
__version__ = "0.1.0rc1" # x-release-please-version
|
|
45
|
+
|
|
46
|
+
__all__ = [
|
|
47
|
+
"__version__",
|
|
48
|
+
# client
|
|
49
|
+
"TokenHelm",
|
|
50
|
+
"TraceScope",
|
|
51
|
+
"StreamSession",
|
|
52
|
+
# event + enum
|
|
53
|
+
"LLMEvent",
|
|
54
|
+
"LLMProvider",
|
|
55
|
+
"LLMUsage",
|
|
56
|
+
"LLMCost",
|
|
57
|
+
"LLMRequest",
|
|
58
|
+
"RateEntry",
|
|
59
|
+
# interfaces
|
|
60
|
+
"BaseAdapter",
|
|
61
|
+
"StreamAggregator",
|
|
62
|
+
"PricingProvider",
|
|
63
|
+
"EventDispatcher",
|
|
64
|
+
"Logger",
|
|
65
|
+
"StorageBackend",
|
|
66
|
+
# built-in adapters
|
|
67
|
+
"OpenAIAdapter",
|
|
68
|
+
"AnthropicAdapter",
|
|
69
|
+
"GeminiAdapter",
|
|
70
|
+
"OllamaAdapter",
|
|
71
|
+
# default implementations
|
|
72
|
+
"YamlPricingProvider",
|
|
73
|
+
"DefaultEventDispatcher",
|
|
74
|
+
"ConsoleLogger",
|
|
75
|
+
"JSONLogger",
|
|
76
|
+
"FileLogger",
|
|
77
|
+
"InMemoryStorageBackend",
|
|
78
|
+
"CostCalculator",
|
|
79
|
+
# errors
|
|
80
|
+
"TokenHelmError",
|
|
81
|
+
"ProviderNotInstalledError",
|
|
82
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Provider adapters (extension point #1).
|
|
2
|
+
|
|
3
|
+
``default_adapters()`` is the built-in adapter set the client registers by default. It lives
|
|
4
|
+
here (not in ``core``) so the core stays decoupled from concrete adapter implementations —
|
|
5
|
+
``core.extraction.UsageParser`` depends only on :class:`BaseAdapter`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from .base import BaseAdapter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def default_adapters() -> list[BaseAdapter]:
|
|
14
|
+
"""Built-in adapters, in resolution order (US1 ships OpenAI; US2 adds the rest)."""
|
|
15
|
+
from .anthropic import AnthropicAdapter
|
|
16
|
+
from .gemini import GeminiAdapter
|
|
17
|
+
from .ollama import OllamaAdapter
|
|
18
|
+
from .openai import OpenAIAdapter
|
|
19
|
+
|
|
20
|
+
return [OpenAIAdapter(), AnthropicAdapter(), GeminiAdapter(), OllamaAdapter()]
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""AnthropicAdapter — normalize Anthropic Messages responses (T034).
|
|
2
|
+
|
|
3
|
+
Observe-don't-patch: reads attributes off the ``Message`` object the developer's own
|
|
4
|
+
``anthropic`` client returns. Anthropic reports usage under ``usage.input_tokens`` /
|
|
5
|
+
``output_tokens`` and does **not** supply a combined total (LLMUsage derives it). Prompt-cache
|
|
6
|
+
counts (``cache_creation_input_tokens`` / ``cache_read_input_tokens``) are preserved in
|
|
7
|
+
``LLMUsage.extra`` rather than folded into the core schema.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from ..core.models import LLMProvider, LLMUsage
|
|
13
|
+
from .base import BaseAdapter, StreamAggregator
|
|
14
|
+
|
|
15
|
+
_CACHE_FIELDS = ("cache_creation_input_tokens", "cache_read_input_tokens")
|
|
16
|
+
|
|
17
|
+
_STREAM_EVENT_TYPES = frozenset(
|
|
18
|
+
{
|
|
19
|
+
"message_start",
|
|
20
|
+
"content_block_start",
|
|
21
|
+
"content_block_delta",
|
|
22
|
+
"content_block_stop",
|
|
23
|
+
"message_delta",
|
|
24
|
+
"message_stop",
|
|
25
|
+
"ping",
|
|
26
|
+
}
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AnthropicAdapter(BaseAdapter):
|
|
31
|
+
@property
|
|
32
|
+
def provider(self) -> LLMProvider:
|
|
33
|
+
return LLMProvider.ANTHROPIC
|
|
34
|
+
|
|
35
|
+
def identify(self, response: object) -> bool:
|
|
36
|
+
# Anthropic Message objects carry type == "message"; their usage uses input_tokens.
|
|
37
|
+
if getattr(response, "type", None) == "message":
|
|
38
|
+
return True
|
|
39
|
+
usage = getattr(response, "usage", None)
|
|
40
|
+
return (
|
|
41
|
+
usage is not None
|
|
42
|
+
and hasattr(usage, "input_tokens")
|
|
43
|
+
and hasattr(response, "stop_reason")
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def extract_model(self, response: object) -> str:
|
|
47
|
+
return str(getattr(response, "model", "") or "")
|
|
48
|
+
|
|
49
|
+
def extract_usage(self, response: object) -> LLMUsage:
|
|
50
|
+
usage = getattr(response, "usage", None)
|
|
51
|
+
if usage is None:
|
|
52
|
+
return LLMUsage()
|
|
53
|
+
input_tokens = getattr(usage, "input_tokens", None)
|
|
54
|
+
output_tokens = getattr(usage, "output_tokens", None)
|
|
55
|
+
# Anthropic does not report a combined total; LLMUsage derives input + output.
|
|
56
|
+
return LLMUsage(
|
|
57
|
+
input_tokens=input_tokens,
|
|
58
|
+
output_tokens=output_tokens,
|
|
59
|
+
extra=_cache_extra(usage),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def identify_stream(self, chunk: object) -> bool:
|
|
63
|
+
return getattr(chunk, "type", None) in _STREAM_EVENT_TYPES
|
|
64
|
+
|
|
65
|
+
def new_stream_aggregator(self) -> StreamAggregator:
|
|
66
|
+
return _AnthropicStreamAggregator()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _cache_extra(usage: object) -> dict[str, int]:
|
|
70
|
+
extra: dict[str, int] = {}
|
|
71
|
+
for name in _CACHE_FIELDS:
|
|
72
|
+
value = getattr(usage, name, None)
|
|
73
|
+
if value is not None:
|
|
74
|
+
extra[name] = value
|
|
75
|
+
return extra
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class _AnthropicStreamAggregator(StreamAggregator):
|
|
79
|
+
"""Anthropic streaming: input + model from ``message_start``; output from ``message_delta``.
|
|
80
|
+
|
|
81
|
+
``message_start`` carries ``message.usage.input_tokens`` (and the model); subsequent
|
|
82
|
+
``message_delta`` events carry the running ``usage.output_tokens``.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(self) -> None:
|
|
86
|
+
self._model = ""
|
|
87
|
+
self._input: int | None = None
|
|
88
|
+
self._output: int | None = None
|
|
89
|
+
self._extra: dict[str, int] = {}
|
|
90
|
+
|
|
91
|
+
def consume(self, chunk: object) -> None:
|
|
92
|
+
ctype = getattr(chunk, "type", None)
|
|
93
|
+
if ctype == "message_start":
|
|
94
|
+
message = getattr(chunk, "message", None)
|
|
95
|
+
if message is not None:
|
|
96
|
+
self._model = str(getattr(message, "model", "") or "")
|
|
97
|
+
usage = getattr(message, "usage", None)
|
|
98
|
+
if usage is not None:
|
|
99
|
+
self._input = getattr(usage, "input_tokens", None)
|
|
100
|
+
self._output = getattr(usage, "output_tokens", None)
|
|
101
|
+
self._extra = _cache_extra(usage)
|
|
102
|
+
elif ctype == "message_delta":
|
|
103
|
+
usage = getattr(chunk, "usage", None)
|
|
104
|
+
if usage is not None:
|
|
105
|
+
output = getattr(usage, "output_tokens", None)
|
|
106
|
+
if output is not None:
|
|
107
|
+
self._output = output
|
|
108
|
+
|
|
109
|
+
def model(self) -> str:
|
|
110
|
+
return self._model
|
|
111
|
+
|
|
112
|
+
def usage(self) -> LLMUsage:
|
|
113
|
+
return LLMUsage(input_tokens=self._input, output_tokens=self._output, extra=self._extra)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Extension point #1 — provider adapter contract (T006, T050).
|
|
2
|
+
|
|
3
|
+
An adapter *observes* a response object the developer's own client returned; it never patches
|
|
4
|
+
or wraps the provider SDK. Adapters identify a response by its shape (duck typing) so the core
|
|
5
|
+
needs no provider SDK installed to normalize a response.
|
|
6
|
+
|
|
7
|
+
Streaming (US4): each adapter owns its provider-specific chunk-aggregation logic behind a
|
|
8
|
+
:class:`StreamAggregator`. The core tracker never understands streaming payloads — it only asks
|
|
9
|
+
the aggregator for a final :class:`LLMUsage` and model. ``identify_stream`` recognizes a
|
|
10
|
+
streaming *chunk* (whose shape may differ from a full response).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import abc
|
|
16
|
+
|
|
17
|
+
from ..core.models import LLMProvider, LLMUsage
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StreamAggregator(abc.ABC):
|
|
21
|
+
"""Accumulates provider-specific stream chunks into a normalized usage/model.
|
|
22
|
+
|
|
23
|
+
One aggregator instance handles one stream. ``consume`` is called once per chunk (in
|
|
24
|
+
order); ``model`` and ``usage`` are read once at finalization. Implementations must
|
|
25
|
+
tolerate partial streams — returning whatever usage was seen so far, with missing counts
|
|
26
|
+
left as ``None`` (so the event flags ``usage_complete=False``).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def consume(self, chunk: object) -> None:
|
|
31
|
+
"""Accumulate one stream chunk. Must not raise on a chunk lacking usage."""
|
|
32
|
+
|
|
33
|
+
@abc.abstractmethod
|
|
34
|
+
def model(self) -> str:
|
|
35
|
+
"""Best-known model id from the chunks seen so far."""
|
|
36
|
+
|
|
37
|
+
@abc.abstractmethod
|
|
38
|
+
def usage(self) -> LLMUsage:
|
|
39
|
+
"""The aggregated usage so far (final once the stream is exhausted)."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class BaseAdapter(abc.ABC):
|
|
43
|
+
"""Contract every provider adapter satisfies."""
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
@abc.abstractmethod
|
|
47
|
+
def provider(self) -> LLMProvider:
|
|
48
|
+
"""Which provider this adapter handles."""
|
|
49
|
+
|
|
50
|
+
@abc.abstractmethod
|
|
51
|
+
def identify(self, response: object) -> bool:
|
|
52
|
+
"""True if this adapter can parse a completed (non-streaming) ``response``."""
|
|
53
|
+
|
|
54
|
+
@abc.abstractmethod
|
|
55
|
+
def extract_usage(self, response: object) -> LLMUsage:
|
|
56
|
+
"""Normalize token usage into an :class:`LLMUsage`."""
|
|
57
|
+
|
|
58
|
+
@abc.abstractmethod
|
|
59
|
+
def extract_model(self, response: object) -> str:
|
|
60
|
+
"""Read the model identifier reported by the response."""
|
|
61
|
+
|
|
62
|
+
# -- streaming (optional per adapter) --------------------------------------------
|
|
63
|
+
|
|
64
|
+
def identify_stream(self, chunk: object) -> bool:
|
|
65
|
+
"""True if this adapter recognizes a streaming *chunk*. Defaults to ``identify``.
|
|
66
|
+
|
|
67
|
+
Override when a provider's streaming chunk shape differs from its full response.
|
|
68
|
+
"""
|
|
69
|
+
return self.identify(chunk)
|
|
70
|
+
|
|
71
|
+
def new_stream_aggregator(self) -> StreamAggregator:
|
|
72
|
+
"""Return a fresh :class:`StreamAggregator` for one stream. v0.1 default: unsupported."""
|
|
73
|
+
raise NotImplementedError(f"{type(self).__name__} does not support streaming aggregation.")
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""GeminiAdapter — normalize Google Gemini responses (T033).
|
|
2
|
+
|
|
3
|
+
Observe-don't-patch: reads attributes off the ``GenerateContentResponse`` object the
|
|
4
|
+
developer's own ``google-genai`` client returns. Gemini reports usage under
|
|
5
|
+
``usage_metadata`` (``prompt_token_count`` / ``candidates_token_count`` / ``total_token_count``)
|
|
6
|
+
and the resolved model under ``model_version``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from ..core.models import LLMProvider, LLMUsage
|
|
12
|
+
from .base import BaseAdapter, StreamAggregator
|
|
13
|
+
|
|
14
|
+
_GEMINI_EXTRA_FIELDS = (
|
|
15
|
+
"cached_content_token_count",
|
|
16
|
+
"thoughts_token_count",
|
|
17
|
+
"tool_use_prompt_token_count",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _model_of(response: object) -> str:
|
|
22
|
+
return str(getattr(response, "model_version", None) or getattr(response, "model", "") or "")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _usage_from_metadata(meta: object) -> LLMUsage:
|
|
26
|
+
extra: dict[str, int] = {}
|
|
27
|
+
# Gemini may report cached / tool / thinking tokens; keep them out of the core schema.
|
|
28
|
+
for name in _GEMINI_EXTRA_FIELDS:
|
|
29
|
+
value = getattr(meta, name, None)
|
|
30
|
+
if value is not None:
|
|
31
|
+
extra[name] = value
|
|
32
|
+
return LLMUsage(
|
|
33
|
+
input_tokens=getattr(meta, "prompt_token_count", None),
|
|
34
|
+
output_tokens=getattr(meta, "candidates_token_count", None),
|
|
35
|
+
total_tokens=getattr(meta, "total_token_count", None),
|
|
36
|
+
extra=extra,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class GeminiAdapter(BaseAdapter):
|
|
41
|
+
@property
|
|
42
|
+
def provider(self) -> LLMProvider:
|
|
43
|
+
return LLMProvider.GEMINI
|
|
44
|
+
|
|
45
|
+
def identify(self, response: object) -> bool:
|
|
46
|
+
# The ``usage_metadata`` container with Gemini's token-count fields is distinctive.
|
|
47
|
+
meta = getattr(response, "usage_metadata", None)
|
|
48
|
+
return meta is not None and (
|
|
49
|
+
hasattr(meta, "prompt_token_count") or hasattr(meta, "candidates_token_count")
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def extract_model(self, response: object) -> str:
|
|
53
|
+
return _model_of(response)
|
|
54
|
+
|
|
55
|
+
def extract_usage(self, response: object) -> LLMUsage:
|
|
56
|
+
meta = getattr(response, "usage_metadata", None)
|
|
57
|
+
if meta is None:
|
|
58
|
+
return LLMUsage()
|
|
59
|
+
return _usage_from_metadata(meta)
|
|
60
|
+
|
|
61
|
+
def identify_stream(self, chunk: object) -> bool:
|
|
62
|
+
# Streaming chunks are GenerateContentResponse objects; earliest chunks may carry
|
|
63
|
+
# candidates before usage_metadata is populated.
|
|
64
|
+
return self.identify(chunk) or hasattr(chunk, "candidates")
|
|
65
|
+
|
|
66
|
+
def new_stream_aggregator(self) -> StreamAggregator:
|
|
67
|
+
return _GeminiStreamAggregator()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class _GeminiStreamAggregator(StreamAggregator):
|
|
71
|
+
"""Gemini streaming: keep the latest ``usage_metadata`` / ``model_version`` seen.
|
|
72
|
+
|
|
73
|
+
Gemini reports cumulative usage on each chunk, with the final chunk holding the totals.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self) -> None:
|
|
77
|
+
self._model = ""
|
|
78
|
+
self._usage = LLMUsage()
|
|
79
|
+
|
|
80
|
+
def consume(self, chunk: object) -> None:
|
|
81
|
+
model = _model_of(chunk)
|
|
82
|
+
if model:
|
|
83
|
+
self._model = model
|
|
84
|
+
meta = getattr(chunk, "usage_metadata", None)
|
|
85
|
+
if meta is not None:
|
|
86
|
+
self._usage = _usage_from_metadata(meta)
|
|
87
|
+
|
|
88
|
+
def model(self) -> str:
|
|
89
|
+
return self._model
|
|
90
|
+
|
|
91
|
+
def usage(self) -> LLMUsage:
|
|
92
|
+
return self._usage
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""OllamaAdapter — normalize Ollama (local) responses (T035).
|
|
2
|
+
|
|
3
|
+
Observe-don't-patch: reads attributes off the response object the developer's own ``ollama``
|
|
4
|
+
client returns. Ollama reports usage as ``prompt_eval_count`` (input) and ``eval_count``
|
|
5
|
+
(output); there is no combined total (LLMUsage derives it). Local inference is typically
|
|
6
|
+
zero-cost — unlisted models simply resolve as "unpriced" (cost 0) without error.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from ..core.models import LLMProvider, LLMUsage
|
|
12
|
+
from .base import BaseAdapter, StreamAggregator
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _usage_of(response: object) -> LLMUsage:
|
|
16
|
+
# Ollama provides no combined total; LLMUsage derives input + output.
|
|
17
|
+
return LLMUsage(
|
|
18
|
+
input_tokens=getattr(response, "prompt_eval_count", None),
|
|
19
|
+
output_tokens=getattr(response, "eval_count", None),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OllamaAdapter(BaseAdapter):
|
|
24
|
+
@property
|
|
25
|
+
def provider(self) -> LLMProvider:
|
|
26
|
+
return LLMProvider.OLLAMA
|
|
27
|
+
|
|
28
|
+
def identify(self, response: object) -> bool:
|
|
29
|
+
# eval_count / prompt_eval_count are distinctive to Ollama responses.
|
|
30
|
+
return hasattr(response, "eval_count") or hasattr(response, "prompt_eval_count")
|
|
31
|
+
|
|
32
|
+
def extract_model(self, response: object) -> str:
|
|
33
|
+
return str(getattr(response, "model", "") or "")
|
|
34
|
+
|
|
35
|
+
def extract_usage(self, response: object) -> LLMUsage:
|
|
36
|
+
return _usage_of(response)
|
|
37
|
+
|
|
38
|
+
def identify_stream(self, chunk: object) -> bool:
|
|
39
|
+
# Intermediate stream chunks lack eval counts but carry model + done.
|
|
40
|
+
return self.identify(chunk) or (hasattr(chunk, "done") and hasattr(chunk, "model"))
|
|
41
|
+
|
|
42
|
+
def new_stream_aggregator(self) -> StreamAggregator:
|
|
43
|
+
return _OllamaStreamAggregator()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class _OllamaStreamAggregator(StreamAggregator):
|
|
47
|
+
"""Ollama streaming: the final ``done`` chunk carries prompt_eval_count / eval_count."""
|
|
48
|
+
|
|
49
|
+
def __init__(self) -> None:
|
|
50
|
+
self._model = ""
|
|
51
|
+
self._input: int | None = None
|
|
52
|
+
self._output: int | None = None
|
|
53
|
+
|
|
54
|
+
def consume(self, chunk: object) -> None:
|
|
55
|
+
model = getattr(chunk, "model", None)
|
|
56
|
+
if model:
|
|
57
|
+
self._model = str(model)
|
|
58
|
+
prompt_eval = getattr(chunk, "prompt_eval_count", None)
|
|
59
|
+
if prompt_eval is not None:
|
|
60
|
+
self._input = prompt_eval
|
|
61
|
+
eval_count = getattr(chunk, "eval_count", None)
|
|
62
|
+
if eval_count is not None:
|
|
63
|
+
self._output = eval_count
|
|
64
|
+
|
|
65
|
+
def model(self) -> str:
|
|
66
|
+
return self._model
|
|
67
|
+
|
|
68
|
+
def usage(self) -> LLMUsage:
|
|
69
|
+
return LLMUsage(input_tokens=self._input, output_tokens=self._output)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""OpenAIAdapter — normalize OpenAI responses (T024).
|
|
2
|
+
|
|
3
|
+
Observe-don't-patch: this reads attributes off whatever response object the developer's own
|
|
4
|
+
OpenAI client returned. It identifies responses by shape (duck typing), so the core does not
|
|
5
|
+
need the ``openai`` package installed to normalize a response. Supports both the Chat
|
|
6
|
+
Completions shape (``usage.prompt_tokens`` / ``completion_tokens``) and the Responses API
|
|
7
|
+
shape (``usage.input_tokens`` / ``output_tokens``).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from ..core.models import LLMProvider, LLMUsage
|
|
13
|
+
from .base import BaseAdapter, StreamAggregator
|
|
14
|
+
|
|
15
|
+
# OpenAI response objects carry an ``object`` discriminator, e.g. "chat.completion",
|
|
16
|
+
# "response", or "text_completion". We use it (when present) to disambiguate from other
|
|
17
|
+
# providers that also expose input/output token counts (notably Anthropic).
|
|
18
|
+
_OPENAI_OBJECT_PREFIXES = ("chat.completion", "response", "text_completion")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class OpenAIAdapter(BaseAdapter):
|
|
22
|
+
@property
|
|
23
|
+
def provider(self) -> LLMProvider:
|
|
24
|
+
return LLMProvider.OPENAI
|
|
25
|
+
|
|
26
|
+
def identify(self, response: object) -> bool:
|
|
27
|
+
obj = getattr(response, "object", None)
|
|
28
|
+
if isinstance(obj, str) and obj.startswith(_OPENAI_OBJECT_PREFIXES):
|
|
29
|
+
return True
|
|
30
|
+
usage = getattr(response, "usage", None)
|
|
31
|
+
# Chat Completions usage has the OpenAI-specific ``prompt_tokens`` field.
|
|
32
|
+
return usage is not None and hasattr(usage, "prompt_tokens")
|
|
33
|
+
|
|
34
|
+
def extract_model(self, response: object) -> str:
|
|
35
|
+
return str(getattr(response, "model", "") or "")
|
|
36
|
+
|
|
37
|
+
def extract_usage(self, response: object) -> LLMUsage:
|
|
38
|
+
usage = getattr(response, "usage", None)
|
|
39
|
+
if usage is None:
|
|
40
|
+
return LLMUsage()
|
|
41
|
+
return _usage_from_openai(usage)
|
|
42
|
+
|
|
43
|
+
def new_stream_aggregator(self) -> StreamAggregator:
|
|
44
|
+
return _OpenAIStreamAggregator()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _usage_from_openai(usage: object) -> LLMUsage:
|
|
48
|
+
"""Normalize an OpenAI ``usage`` object (Chat Completions or Responses API naming)."""
|
|
49
|
+
input_tokens = getattr(usage, "prompt_tokens", None)
|
|
50
|
+
output_tokens = getattr(usage, "completion_tokens", None)
|
|
51
|
+
if input_tokens is None and output_tokens is None:
|
|
52
|
+
input_tokens = getattr(usage, "input_tokens", None)
|
|
53
|
+
output_tokens = getattr(usage, "output_tokens", None)
|
|
54
|
+
total_tokens = getattr(usage, "total_tokens", None)
|
|
55
|
+
return LLMUsage(
|
|
56
|
+
input_tokens=input_tokens,
|
|
57
|
+
output_tokens=output_tokens,
|
|
58
|
+
total_tokens=total_tokens,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class _OpenAIStreamAggregator(StreamAggregator):
|
|
63
|
+
"""OpenAI streaming: final usage arrives in a late chunk's ``usage`` (include_usage)."""
|
|
64
|
+
|
|
65
|
+
def __init__(self) -> None:
|
|
66
|
+
self._model = ""
|
|
67
|
+
self._usage = LLMUsage()
|
|
68
|
+
|
|
69
|
+
def consume(self, chunk: object) -> None:
|
|
70
|
+
model = getattr(chunk, "model", None)
|
|
71
|
+
if model:
|
|
72
|
+
self._model = str(model)
|
|
73
|
+
usage = getattr(chunk, "usage", None)
|
|
74
|
+
if usage is not None:
|
|
75
|
+
self._usage = _usage_from_openai(usage)
|
|
76
|
+
|
|
77
|
+
def model(self) -> str:
|
|
78
|
+
return self._model
|
|
79
|
+
|
|
80
|
+
def usage(self) -> LLMUsage:
|
|
81
|
+
return self._usage
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core: data model, tracker, extraction, calculator, config, errors."""
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Cost calculation and currency formatting (T021).
|
|
2
|
+
|
|
3
|
+
``CostCalculator`` depends ONLY on the :class:`PricingProvider` interface (Decision 3). A
|
|
4
|
+
``None`` rate lookup yields an unpriced ``LLMCost`` (``priced=False``, zero cost) — never an
|
|
5
|
+
error (FR-013).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from decimal import Decimal
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from .config import DEFAULT_CURRENCY
|
|
14
|
+
from .models import LLMCost, LLMProvider, LLMUsage
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
17
|
+
from ..pricing.base import PricingProvider
|
|
18
|
+
|
|
19
|
+
_PER_MILLION = Decimal(1_000_000)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CostCalculator:
|
|
23
|
+
"""Compute :class:`LLMCost` from usage + a pricing source."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, pricing: PricingProvider, currency: str = DEFAULT_CURRENCY) -> None:
|
|
26
|
+
self._pricing = pricing
|
|
27
|
+
self.currency = currency
|
|
28
|
+
|
|
29
|
+
def compute(self, provider: LLMProvider, model: str, usage: LLMUsage) -> LLMCost:
|
|
30
|
+
rate = self._pricing.get_rates(provider, model)
|
|
31
|
+
if rate is None:
|
|
32
|
+
return LLMCost(currency=self.currency, priced=False)
|
|
33
|
+
|
|
34
|
+
input_tokens = usage.input_tokens or 0
|
|
35
|
+
output_tokens = usage.output_tokens or 0
|
|
36
|
+
input_cost = (Decimal(input_tokens) / _PER_MILLION) * rate.input_rate
|
|
37
|
+
output_cost = (Decimal(output_tokens) / _PER_MILLION) * rate.output_rate
|
|
38
|
+
return LLMCost(
|
|
39
|
+
input_cost=input_cost,
|
|
40
|
+
output_cost=output_cost,
|
|
41
|
+
total_cost=input_cost + output_cost,
|
|
42
|
+
currency=self.currency,
|
|
43
|
+
priced=True,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CurrencyFormatter:
|
|
48
|
+
"""Human-readable money formatting for loggers/CLI output."""
|
|
49
|
+
|
|
50
|
+
_SYMBOLS = {"USD": "$", "EUR": "€", "GBP": "£", "JPY": "¥"}
|
|
51
|
+
|
|
52
|
+
def __init__(self, currency: str = DEFAULT_CURRENCY, *, places: int = 6) -> None:
|
|
53
|
+
self.currency = currency
|
|
54
|
+
self.places = places
|
|
55
|
+
|
|
56
|
+
def format(self, amount: Decimal) -> str:
|
|
57
|
+
symbol = self._SYMBOLS.get(self.currency, "")
|
|
58
|
+
value = f"{amount:.{self.places}f}"
|
|
59
|
+
if symbol:
|
|
60
|
+
return f"{symbol}{value}"
|
|
61
|
+
return f"{value} {self.currency}"
|
tokenhelm/core/config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration constants and helpers (T012).
|
|
2
|
+
|
|
3
|
+
Kept intentionally small for v0.1 — the client holds the live configuration; this module
|
|
4
|
+
provides shared defaults so other modules don't hardcode them.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
DEFAULT_CURRENCY = "USD"
|
|
12
|
+
|
|
13
|
+
#: Bundled pricing file shipped with the package (``tokenhelm/data/pricing.yaml``).
|
|
14
|
+
DEFAULT_PRICING_PATH = Path(__file__).resolve().parent.parent / "data" / "pricing.yaml"
|
tokenhelm/core/errors.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Error types for TokenHelm (T013).
|
|
2
|
+
|
|
3
|
+
Tracking never raises on *missing data* — missing usage or missing pricing are represented
|
|
4
|
+
as flags on the event (FR-013). These exceptions cover structural problems only.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TokenHelmError(Exception):
|
|
11
|
+
"""Base error, e.g. no adapter recognized a response object."""
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ProviderNotInstalledError(TokenHelmError):
|
|
15
|
+
"""A matching provider adapter needs an optional extra that isn't installed."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, provider: str, extra: str | None = None) -> None:
|
|
18
|
+
extra = extra or provider
|
|
19
|
+
super().__init__(
|
|
20
|
+
f"The '{provider}' provider requires the optional extra. "
|
|
21
|
+
f'Install it with: pip install "tokenhelm[{extra}]"'
|
|
22
|
+
)
|
|
23
|
+
self.provider = provider
|
|
24
|
+
self.extra = extra
|