tollgateai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# v0 sandbox internal files
|
|
2
|
+
__v0_runtime_loader.js
|
|
3
|
+
__v0_devtools.tsx
|
|
4
|
+
__v0_jsx-dev-runtime.ts
|
|
5
|
+
.snowflake/
|
|
6
|
+
.v0-trash/
|
|
7
|
+
.vercel/
|
|
8
|
+
|
|
9
|
+
# Environment variables
|
|
10
|
+
.env*.local
|
|
11
|
+
|
|
12
|
+
# Common ignores
|
|
13
|
+
node_modules
|
|
14
|
+
.next/
|
|
15
|
+
.DS_Store
|
|
16
|
+
|
|
17
|
+
# Lambda build artifacts
|
|
18
|
+
dist/
|
|
19
|
+
# Claude Code runtime artifacts
|
|
20
|
+
.claude/
|
|
21
|
+
|
|
22
|
+
.vercel
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tollgateai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Track real LLM model usage and compute live gross margin with Tollgate.
|
|
5
|
+
Project-URL: Homepage, https://tollgateapp.vercel.app
|
|
6
|
+
Author: Tollgate
|
|
7
|
+
License: Proprietary
|
|
8
|
+
Keywords: anthropic,cost,llm,margin,observability,openai,tokens,tollgate
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# tollgateai (Python SDK)
|
|
16
|
+
|
|
17
|
+
Track **real** LLM model usage and compute live gross margin with
|
|
18
|
+
[Tollgate](https://tollgateapp.vercel.app). The SDK reads the actual usage off
|
|
19
|
+
each provider response — you never hand-count tokens. Zero dependencies.
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install tollgateai
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Create an API key in **Tollgate → Integrations**, then set:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
export TOLLGATE_API_KEY=tg_live_xxx
|
|
29
|
+
# optional, defaults to the hosted app:
|
|
30
|
+
export TOLLGATE_BASE_URL=https://tollgateapp.vercel.app
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Auto-instrumentation (recommended)
|
|
34
|
+
|
|
35
|
+
Wrap your provider client once; every call reports real usage in the background.
|
|
36
|
+
|
|
37
|
+
### Anthropic
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from anthropic import Anthropic
|
|
41
|
+
from tollgate import create_tollgate_client, wrap_anthropic
|
|
42
|
+
|
|
43
|
+
tollgate = create_tollgate_client() # reads TOLLGATE_API_KEY
|
|
44
|
+
anthropic = wrap_anthropic(
|
|
45
|
+
Anthropic(), tollgate,
|
|
46
|
+
customer_id="cust_A", # your end customer
|
|
47
|
+
revenue_unit_cents=50, # what you charge for this unit ($0.50)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Use the client normally — usage is tracked automatically.
|
|
51
|
+
anthropic.messages.create(
|
|
52
|
+
model="claude-sonnet-4-6",
|
|
53
|
+
max_tokens=512,
|
|
54
|
+
messages=[{"role": "user", "content": "Summarize this ticket…"}],
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### OpenAI
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from openai import OpenAI
|
|
62
|
+
from tollgate import create_tollgate_client, wrap_openai
|
|
63
|
+
|
|
64
|
+
tollgate = create_tollgate_client()
|
|
65
|
+
openai = wrap_openai(OpenAI(), tollgate, customer_id="cust_A")
|
|
66
|
+
|
|
67
|
+
openai.chat.completions.create(
|
|
68
|
+
model="gpt-4o",
|
|
69
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
70
|
+
)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`revenue_unit_cents` can also be a callable of the response, e.g.
|
|
74
|
+
`revenue_unit_cents=lambda res: 50 if res.something else 0`.
|
|
75
|
+
|
|
76
|
+
## Manual tracking
|
|
77
|
+
|
|
78
|
+
For providers without a wrapper (Bedrock, custom gateways) or full control:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from tollgate import create_tollgate_client
|
|
82
|
+
|
|
83
|
+
tollgate = create_tollgate_client()
|
|
84
|
+
|
|
85
|
+
tollgate.track({
|
|
86
|
+
"customerId": "cust_A",
|
|
87
|
+
"runId": "run_12345",
|
|
88
|
+
"provider": "anthropic",
|
|
89
|
+
"model": "claude-sonnet-4-6",
|
|
90
|
+
"tokensIn": 1200,
|
|
91
|
+
"tokensOut": 450,
|
|
92
|
+
"reasoningTokens": 0,
|
|
93
|
+
"cachedTokens": 0,
|
|
94
|
+
"revenueUnitCents": 50,
|
|
95
|
+
"idempotencyKey": "run_12345#step_1", # exactly-once: safe to retry
|
|
96
|
+
})
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Notes
|
|
100
|
+
|
|
101
|
+
- **Idempotent.** Events dedupe on `idempotencyKey` (auto-set to the provider
|
|
102
|
+
response id by the wrappers), so retries never double-count.
|
|
103
|
+
- **No prompt content is ever sent** — only token counts and metadata.
|
|
104
|
+
- **Streaming** responses are not auto-tracked yet (the wrappers only report when
|
|
105
|
+
a non-streaming `usage` is present). Track those manually for now.
|
|
106
|
+
- **Non-blocking.** Auto-instrumented tracking runs on a background thread;
|
|
107
|
+
failures go to `on_error` (default: log a warning) and never break your call.
|
|
108
|
+
|
|
109
|
+
Licensed for use with Tollgate. Not open source.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# tollgateai (Python SDK)
|
|
2
|
+
|
|
3
|
+
Track **real** LLM model usage and compute live gross margin with
|
|
4
|
+
[Tollgate](https://tollgateapp.vercel.app). The SDK reads the actual usage off
|
|
5
|
+
each provider response — you never hand-count tokens. Zero dependencies.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install tollgateai
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Create an API key in **Tollgate → Integrations**, then set:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
export TOLLGATE_API_KEY=tg_live_xxx
|
|
15
|
+
# optional, defaults to the hosted app:
|
|
16
|
+
export TOLLGATE_BASE_URL=https://tollgateapp.vercel.app
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Auto-instrumentation (recommended)
|
|
20
|
+
|
|
21
|
+
Wrap your provider client once; every call reports real usage in the background.
|
|
22
|
+
|
|
23
|
+
### Anthropic
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from anthropic import Anthropic
|
|
27
|
+
from tollgate import create_tollgate_client, wrap_anthropic
|
|
28
|
+
|
|
29
|
+
tollgate = create_tollgate_client() # reads TOLLGATE_API_KEY
|
|
30
|
+
anthropic = wrap_anthropic(
|
|
31
|
+
Anthropic(), tollgate,
|
|
32
|
+
customer_id="cust_A", # your end customer
|
|
33
|
+
revenue_unit_cents=50, # what you charge for this unit ($0.50)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Use the client normally — usage is tracked automatically.
|
|
37
|
+
anthropic.messages.create(
|
|
38
|
+
model="claude-sonnet-4-6",
|
|
39
|
+
max_tokens=512,
|
|
40
|
+
messages=[{"role": "user", "content": "Summarize this ticket…"}],
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### OpenAI
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from openai import OpenAI
|
|
48
|
+
from tollgate import create_tollgate_client, wrap_openai
|
|
49
|
+
|
|
50
|
+
tollgate = create_tollgate_client()
|
|
51
|
+
openai = wrap_openai(OpenAI(), tollgate, customer_id="cust_A")
|
|
52
|
+
|
|
53
|
+
openai.chat.completions.create(
|
|
54
|
+
model="gpt-4o",
|
|
55
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`revenue_unit_cents` can also be a callable of the response, e.g.
|
|
60
|
+
`revenue_unit_cents=lambda res: 50 if res.something else 0`.
|
|
61
|
+
|
|
62
|
+
## Manual tracking
|
|
63
|
+
|
|
64
|
+
For providers without a wrapper (Bedrock, custom gateways) or full control:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from tollgate import create_tollgate_client
|
|
68
|
+
|
|
69
|
+
tollgate = create_tollgate_client()
|
|
70
|
+
|
|
71
|
+
tollgate.track({
|
|
72
|
+
"customerId": "cust_A",
|
|
73
|
+
"runId": "run_12345",
|
|
74
|
+
"provider": "anthropic",
|
|
75
|
+
"model": "claude-sonnet-4-6",
|
|
76
|
+
"tokensIn": 1200,
|
|
77
|
+
"tokensOut": 450,
|
|
78
|
+
"reasoningTokens": 0,
|
|
79
|
+
"cachedTokens": 0,
|
|
80
|
+
"revenueUnitCents": 50,
|
|
81
|
+
"idempotencyKey": "run_12345#step_1", # exactly-once: safe to retry
|
|
82
|
+
})
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Notes
|
|
86
|
+
|
|
87
|
+
- **Idempotent.** Events dedupe on `idempotencyKey` (auto-set to the provider
|
|
88
|
+
response id by the wrappers), so retries never double-count.
|
|
89
|
+
- **No prompt content is ever sent** — only token counts and metadata.
|
|
90
|
+
- **Streaming** responses are not auto-tracked yet (the wrappers only report when
|
|
91
|
+
a non-streaming `usage` is present). Track those manually for now.
|
|
92
|
+
- **Non-blocking.** Auto-instrumented tracking runs on a background thread;
|
|
93
|
+
failures go to `on_error` (default: log a warning) and never break your call.
|
|
94
|
+
|
|
95
|
+
Licensed for use with Tollgate. Not open source.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tollgateai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Track real LLM model usage and compute live gross margin with Tollgate."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "Proprietary" }
|
|
12
|
+
authors = [{ name = "Tollgate" }]
|
|
13
|
+
keywords = ["llm", "tokens", "cost", "margin", "observability", "anthropic", "openai", "tollgate"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Topic :: Software Development :: Libraries",
|
|
18
|
+
]
|
|
19
|
+
dependencies = []
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Homepage = "https://tollgateapp.vercel.app"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.build.targets.wheel]
|
|
25
|
+
packages = ["src/tollgate"]
|
|
26
|
+
|
|
27
|
+
[tool.hatch.build.targets.sdist]
|
|
28
|
+
include = ["src/tollgate", "README.md"]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Tollgate Python SDK — track real LLM usage and compute live gross margin."""
|
|
2
|
+
|
|
3
|
+
from .client import (
|
|
4
|
+
TollgateClient,
|
|
5
|
+
TollgateError,
|
|
6
|
+
create_tollgate_client,
|
|
7
|
+
)
|
|
8
|
+
from .instrument import (
|
|
9
|
+
anthropic_event_from,
|
|
10
|
+
openai_event_from,
|
|
11
|
+
wrap_anthropic,
|
|
12
|
+
wrap_openai,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__version__ = "0.1.0"
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"TollgateClient",
|
|
19
|
+
"TollgateError",
|
|
20
|
+
"create_tollgate_client",
|
|
21
|
+
"wrap_anthropic",
|
|
22
|
+
"wrap_openai",
|
|
23
|
+
"anthropic_event_from",
|
|
24
|
+
"openai_event_from",
|
|
25
|
+
"__version__",
|
|
26
|
+
]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Tollgate ingest client. Zero dependencies — uses urllib from the stdlib."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
import urllib.error
|
|
7
|
+
import urllib.request
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
DEFAULT_BASE_URL = "https://tollgateapp.vercel.app"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TollgateError(Exception):
|
|
14
|
+
def __init__(self, message: str, status: Optional[int] = None, body: Any = None):
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
self.status = status
|
|
17
|
+
self.body = body
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TollgateClient:
|
|
21
|
+
"""Reports usage events to ``POST /api/track``. Idempotent on ``idempotencyKey``."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
api_key: Optional[str] = None,
|
|
26
|
+
base_url: Optional[str] = None,
|
|
27
|
+
timeout: float = 10.0,
|
|
28
|
+
max_retries: int = 2,
|
|
29
|
+
):
|
|
30
|
+
self.api_key = api_key or os.environ.get("TOLLGATE_API_KEY")
|
|
31
|
+
self.base_url = (
|
|
32
|
+
base_url or os.environ.get("TOLLGATE_BASE_URL") or DEFAULT_BASE_URL
|
|
33
|
+
).rstrip("/")
|
|
34
|
+
self.timeout = timeout
|
|
35
|
+
self.max_retries = max_retries
|
|
36
|
+
|
|
37
|
+
def track(self, event: Dict[str, Any]) -> Dict[str, Any]:
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise TollgateError("Missing API key — pass api_key or set TOLLGATE_API_KEY.")
|
|
40
|
+
|
|
41
|
+
# Drop None values so server defaults apply.
|
|
42
|
+
payload = json.dumps({k: v for k, v in event.items() if v is not None}).encode("utf-8")
|
|
43
|
+
url = self.base_url + "/api/track"
|
|
44
|
+
headers = {
|
|
45
|
+
"Content-Type": "application/json",
|
|
46
|
+
"Authorization": "Bearer " + self.api_key,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
last_err: Optional[Exception] = None
|
|
50
|
+
for attempt in range(self.max_retries + 1):
|
|
51
|
+
req = urllib.request.Request(url, data=payload, method="POST", headers=headers)
|
|
52
|
+
try:
|
|
53
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
|
|
54
|
+
# 200 (duplicate) and 201 (created) both succeed here.
|
|
55
|
+
return json.loads(resp.read().decode("utf-8") or "{}")
|
|
56
|
+
except urllib.error.HTTPError as e:
|
|
57
|
+
status = e.code
|
|
58
|
+
if status >= 500 or status == 429:
|
|
59
|
+
last_err = TollgateError("Tollgate track failed (%d)" % status, status)
|
|
60
|
+
else:
|
|
61
|
+
body = None
|
|
62
|
+
try:
|
|
63
|
+
body = json.loads(e.read().decode("utf-8") or "{}")
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
raise TollgateError("Tollgate track failed (%d)" % status, status, body)
|
|
67
|
+
except urllib.error.URLError as e:
|
|
68
|
+
last_err = e
|
|
69
|
+
|
|
70
|
+
if attempt < self.max_retries:
|
|
71
|
+
time.sleep(0.2 * (2 ** attempt)) # 0.2s, 0.4s, …
|
|
72
|
+
|
|
73
|
+
raise last_err or TollgateError("Tollgate track failed after retries")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def create_tollgate_client(
|
|
77
|
+
api_key: Optional[str] = None,
|
|
78
|
+
base_url: Optional[str] = None,
|
|
79
|
+
timeout: float = 10.0,
|
|
80
|
+
max_retries: int = 2,
|
|
81
|
+
) -> TollgateClient:
|
|
82
|
+
return TollgateClient(api_key=api_key, base_url=base_url, timeout=timeout, max_retries=max_retries)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Auto-instrumentation: wrap a provider client so every completion reports its
|
|
2
|
+
REAL usage to Tollgate — no manual token counting. Structurally typed, so this
|
|
3
|
+
package never has to depend on the provider SDKs."""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
import uuid
|
|
8
|
+
from typing import Any, Callable, Dict, Optional, Union
|
|
9
|
+
|
|
10
|
+
from .client import TollgateClient
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger("tollgate")
|
|
13
|
+
|
|
14
|
+
Revenue = Union[int, Callable[[Any], Optional[int]], None]
|
|
15
|
+
RunId = Union[str, Callable[[], str], None]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _attr(obj: Any, *path: str, default: Any = None) -> Any:
|
|
19
|
+
"""Read a nested attribute or dict key path, tolerant of either."""
|
|
20
|
+
cur = obj
|
|
21
|
+
for p in path:
|
|
22
|
+
if cur is None:
|
|
23
|
+
return default
|
|
24
|
+
cur = cur.get(p) if isinstance(cur, dict) else getattr(cur, p, None)
|
|
25
|
+
return cur if cur is not None else default
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _resolve_run_id(run_id: RunId, response_id: Optional[str]) -> str:
|
|
29
|
+
if callable(run_id):
|
|
30
|
+
return run_id()
|
|
31
|
+
return run_id or response_id or str(uuid.uuid4())
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _resolve_revenue(revenue: Revenue, response: Any) -> Optional[int]:
|
|
35
|
+
return revenue(response) if callable(revenue) else revenue
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _fire(tollgate: TollgateClient, event: Dict[str, Any], on_error: Optional[Callable[[Exception], None]]) -> None:
|
|
39
|
+
def run() -> None:
|
|
40
|
+
try:
|
|
41
|
+
tollgate.track(event)
|
|
42
|
+
except Exception as err: # noqa: BLE001 - report, never raise into caller
|
|
43
|
+
(on_error or (lambda e: logger.warning("[tollgate] track failed: %s", e)))(err)
|
|
44
|
+
|
|
45
|
+
threading.Thread(target=run, daemon=True).start()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# --- Anthropic ------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
def anthropic_event_from(
|
|
51
|
+
msg: Any,
|
|
52
|
+
customer_id: str,
|
|
53
|
+
agent_id: Optional[str] = None,
|
|
54
|
+
run_id: RunId = None,
|
|
55
|
+
revenue_unit_cents: Revenue = None,
|
|
56
|
+
) -> Optional[Dict[str, Any]]:
|
|
57
|
+
usage = _attr(msg, "usage")
|
|
58
|
+
if usage is None:
|
|
59
|
+
return None
|
|
60
|
+
rid = _resolve_run_id(run_id, _attr(msg, "id"))
|
|
61
|
+
return {
|
|
62
|
+
"customerId": customer_id,
|
|
63
|
+
"agentId": agent_id,
|
|
64
|
+
"runId": rid,
|
|
65
|
+
"provider": "anthropic",
|
|
66
|
+
"model": _attr(msg, "model", default="unknown"),
|
|
67
|
+
"tokensIn": _attr(usage, "input_tokens", default=0),
|
|
68
|
+
"tokensOut": _attr(usage, "output_tokens", default=0),
|
|
69
|
+
"cachedTokens": _attr(usage, "cache_read_input_tokens", default=0),
|
|
70
|
+
"revenueUnitCents": _resolve_revenue(revenue_unit_cents, msg),
|
|
71
|
+
"idempotencyKey": _attr(msg, "id") or rid,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# --- OpenAI ---------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def openai_event_from(
|
|
78
|
+
completion: Any,
|
|
79
|
+
customer_id: str,
|
|
80
|
+
agent_id: Optional[str] = None,
|
|
81
|
+
run_id: RunId = None,
|
|
82
|
+
revenue_unit_cents: Revenue = None,
|
|
83
|
+
) -> Optional[Dict[str, Any]]:
|
|
84
|
+
usage = _attr(completion, "usage")
|
|
85
|
+
if usage is None:
|
|
86
|
+
return None
|
|
87
|
+
rid = _resolve_run_id(run_id, _attr(completion, "id"))
|
|
88
|
+
return {
|
|
89
|
+
"customerId": customer_id,
|
|
90
|
+
"agentId": agent_id,
|
|
91
|
+
"runId": rid,
|
|
92
|
+
"provider": "openai",
|
|
93
|
+
"model": _attr(completion, "model", default="unknown"),
|
|
94
|
+
"tokensIn": _attr(usage, "prompt_tokens", default=0),
|
|
95
|
+
"tokensOut": _attr(usage, "completion_tokens", default=0),
|
|
96
|
+
"reasoningTokens": _attr(usage, "completion_tokens_details", "reasoning_tokens", default=0),
|
|
97
|
+
"cachedTokens": _attr(usage, "prompt_tokens_details", "cached_tokens", default=0),
|
|
98
|
+
"revenueUnitCents": _resolve_revenue(revenue_unit_cents, completion),
|
|
99
|
+
"idempotencyKey": _attr(completion, "id") or rid,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# --- Proxy plumbing -------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
class _CreateInterceptor:
|
|
106
|
+
"""Wraps an object exposing ``create(...)`` and reports the response."""
|
|
107
|
+
|
|
108
|
+
def __init__(self, inner: Any, hook: Callable[[Any], None]):
|
|
109
|
+
self._inner = inner
|
|
110
|
+
self._hook = hook
|
|
111
|
+
|
|
112
|
+
def create(self, *args: Any, **kwargs: Any) -> Any:
|
|
113
|
+
result = self._inner.create(*args, **kwargs)
|
|
114
|
+
self._hook(result)
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
def __getattr__(self, name: str) -> Any:
|
|
118
|
+
return getattr(self._inner, name)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class _Proxy:
|
|
122
|
+
"""Forwards everything to ``target`` except the keys in ``overrides``."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, target: Any, overrides: Dict[str, Any]):
|
|
125
|
+
object.__setattr__(self, "_target", target)
|
|
126
|
+
object.__setattr__(self, "_overrides", overrides)
|
|
127
|
+
|
|
128
|
+
def __getattr__(self, name: str) -> Any:
|
|
129
|
+
overrides = object.__getattribute__(self, "_overrides")
|
|
130
|
+
if name in overrides:
|
|
131
|
+
return overrides[name]
|
|
132
|
+
return getattr(object.__getattribute__(self, "_target"), name)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def wrap_anthropic(
|
|
136
|
+
client: Any,
|
|
137
|
+
tollgate: TollgateClient,
|
|
138
|
+
customer_id: str,
|
|
139
|
+
agent_id: Optional[str] = None,
|
|
140
|
+
run_id: RunId = None,
|
|
141
|
+
revenue_unit_cents: Revenue = None,
|
|
142
|
+
on_error: Optional[Callable[[Exception], None]] = None,
|
|
143
|
+
) -> Any:
|
|
144
|
+
"""Wrap an Anthropic client so ``messages.create`` auto-reports real usage."""
|
|
145
|
+
|
|
146
|
+
def hook(result: Any) -> None:
|
|
147
|
+
event = anthropic_event_from(result, customer_id, agent_id, run_id, revenue_unit_cents)
|
|
148
|
+
if event:
|
|
149
|
+
_fire(tollgate, event, on_error)
|
|
150
|
+
|
|
151
|
+
messages = _CreateInterceptor(client.messages, hook)
|
|
152
|
+
return _Proxy(client, {"messages": messages})
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def wrap_openai(
|
|
156
|
+
client: Any,
|
|
157
|
+
tollgate: TollgateClient,
|
|
158
|
+
customer_id: str,
|
|
159
|
+
agent_id: Optional[str] = None,
|
|
160
|
+
run_id: RunId = None,
|
|
161
|
+
revenue_unit_cents: Revenue = None,
|
|
162
|
+
on_error: Optional[Callable[[Exception], None]] = None,
|
|
163
|
+
) -> Any:
|
|
164
|
+
"""Wrap an OpenAI client so ``chat.completions.create`` auto-reports usage."""
|
|
165
|
+
|
|
166
|
+
def hook(result: Any) -> None:
|
|
167
|
+
event = openai_event_from(result, customer_id, agent_id, run_id, revenue_unit_cents)
|
|
168
|
+
if event:
|
|
169
|
+
_fire(tollgate, event, on_error)
|
|
170
|
+
|
|
171
|
+
completions = _CreateInterceptor(client.chat.completions, hook)
|
|
172
|
+
chat = _Proxy(client.chat, {"completions": completions})
|
|
173
|
+
return _Proxy(client, {"chat": chat})
|