tollgateai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ # v0 sandbox internal files
2
+ __v0_runtime_loader.js
3
+ __v0_devtools.tsx
4
+ __v0_jsx-dev-runtime.ts
5
+ .snowflake/
6
+ .v0-trash/
7
+ .vercel/
8
+
9
+ # Environment variables
10
+ .env*.local
11
+
12
+ # Common ignores
13
+ node_modules
14
+ .next/
15
+ .DS_Store
16
+
17
+ # Lambda build artifacts
18
+ dist/
19
+ # Claude Code runtime artifacts
20
+ .claude/
21
+
22
+ .vercel
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.4
2
+ Name: tollgateai
3
+ Version: 0.1.0
4
+ Summary: Track real LLM model usage and compute live gross margin with Tollgate.
5
+ Project-URL: Homepage, https://tollgateapp.vercel.app
6
+ Author: Tollgate
7
+ License: Proprietary
8
+ Keywords: anthropic,cost,llm,margin,observability,openai,tokens,tollgate
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Software Development :: Libraries
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+
15
+ # tollgateai (Python SDK)
16
+
17
+ Track **real** LLM model usage and compute live gross margin with
18
+ [Tollgate](https://tollgateapp.vercel.app). The SDK reads the actual usage off
19
+ each provider response — you never hand-count tokens. Zero dependencies.
20
+
21
+ ```bash
22
+ pip install tollgateai
23
+ ```
24
+
25
+ Create an API key in **Tollgate → Integrations**, then set:
26
+
27
+ ```bash
28
+ export TOLLGATE_API_KEY=tg_live_xxx
29
+ # optional, defaults to the hosted app:
30
+ export TOLLGATE_BASE_URL=https://tollgateapp.vercel.app
31
+ ```
32
+
33
+ ## Auto-instrumentation (recommended)
34
+
35
+ Wrap your provider client once; every call reports real usage in the background.
36
+
37
+ ### Anthropic
38
+
39
+ ```python
40
+ from anthropic import Anthropic
41
+ from tollgate import create_tollgate_client, wrap_anthropic
42
+
43
+ tollgate = create_tollgate_client() # reads TOLLGATE_API_KEY
44
+ anthropic = wrap_anthropic(
45
+ Anthropic(), tollgate,
46
+ customer_id="cust_A", # your end customer
47
+ revenue_unit_cents=50, # what you charge for this unit ($0.50)
48
+ )
49
+
50
+ # Use the client normally — usage is tracked automatically.
51
+ anthropic.messages.create(
52
+ model="claude-sonnet-4-6",
53
+ max_tokens=512,
54
+ messages=[{"role": "user", "content": "Summarize this ticket…"}],
55
+ )
56
+ ```
57
+
58
+ ### OpenAI
59
+
60
+ ```python
61
+ from openai import OpenAI
62
+ from tollgate import create_tollgate_client, wrap_openai
63
+
64
+ tollgate = create_tollgate_client()
65
+ openai = wrap_openai(OpenAI(), tollgate, customer_id="cust_A")
66
+
67
+ openai.chat.completions.create(
68
+ model="gpt-4o",
69
+ messages=[{"role": "user", "content": "Hello"}],
70
+ )
71
+ ```
72
+
73
+ `revenue_unit_cents` can also be a callable of the response, e.g.
74
+ `revenue_unit_cents=lambda res: 50 if res.something else 0`.
75
+
76
+ ## Manual tracking
77
+
78
+ For providers without a wrapper (Bedrock, custom gateways) or full control:
79
+
80
+ ```python
81
+ from tollgate import create_tollgate_client
82
+
83
+ tollgate = create_tollgate_client()
84
+
85
+ tollgate.track({
86
+ "customerId": "cust_A",
87
+ "runId": "run_12345",
88
+ "provider": "anthropic",
89
+ "model": "claude-sonnet-4-6",
90
+ "tokensIn": 1200,
91
+ "tokensOut": 450,
92
+ "reasoningTokens": 0,
93
+ "cachedTokens": 0,
94
+ "revenueUnitCents": 50,
95
+ "idempotencyKey": "run_12345#step_1", # exactly-once: safe to retry
96
+ })
97
+ ```
98
+
99
+ ## Notes
100
+
101
+ - **Idempotent.** Events dedupe on `idempotencyKey` (auto-set to the provider
102
+ response id by the wrappers), so retries never double-count.
103
+ - **No prompt content is ever sent** — only token counts and metadata.
104
+ - **Streaming** responses are not auto-tracked yet (the wrappers only report when
105
+ a non-streaming `usage` is present). Track those manually for now.
106
+ - **Non-blocking.** Auto-instrumented tracking runs on a background thread;
107
+ failures go to `on_error` (default: log a warning) and never break your call.
108
+
109
+ Licensed for use with Tollgate. Not open source.
@@ -0,0 +1,95 @@
1
+ # tollgateai (Python SDK)
2
+
3
+ Track **real** LLM model usage and compute live gross margin with
4
+ [Tollgate](https://tollgateapp.vercel.app). The SDK reads the actual usage off
5
+ each provider response — you never hand-count tokens. Zero dependencies.
6
+
7
+ ```bash
8
+ pip install tollgateai
9
+ ```
10
+
11
+ Create an API key in **Tollgate → Integrations**, then set:
12
+
13
+ ```bash
14
+ export TOLLGATE_API_KEY=tg_live_xxx
15
+ # optional, defaults to the hosted app:
16
+ export TOLLGATE_BASE_URL=https://tollgateapp.vercel.app
17
+ ```
18
+
19
+ ## Auto-instrumentation (recommended)
20
+
21
+ Wrap your provider client once; every call reports real usage in the background.
22
+
23
+ ### Anthropic
24
+
25
+ ```python
26
+ from anthropic import Anthropic
27
+ from tollgate import create_tollgate_client, wrap_anthropic
28
+
29
+ tollgate = create_tollgate_client() # reads TOLLGATE_API_KEY
30
+ anthropic = wrap_anthropic(
31
+ Anthropic(), tollgate,
32
+ customer_id="cust_A", # your end customer
33
+ revenue_unit_cents=50, # what you charge for this unit ($0.50)
34
+ )
35
+
36
+ # Use the client normally — usage is tracked automatically.
37
+ anthropic.messages.create(
38
+ model="claude-sonnet-4-6",
39
+ max_tokens=512,
40
+ messages=[{"role": "user", "content": "Summarize this ticket…"}],
41
+ )
42
+ ```
43
+
44
+ ### OpenAI
45
+
46
+ ```python
47
+ from openai import OpenAI
48
+ from tollgate import create_tollgate_client, wrap_openai
49
+
50
+ tollgate = create_tollgate_client()
51
+ openai = wrap_openai(OpenAI(), tollgate, customer_id="cust_A")
52
+
53
+ openai.chat.completions.create(
54
+ model="gpt-4o",
55
+ messages=[{"role": "user", "content": "Hello"}],
56
+ )
57
+ ```
58
+
59
+ `revenue_unit_cents` can also be a callable of the response, e.g.
60
+ `revenue_unit_cents=lambda res: 50 if res.something else 0`.
61
+
62
+ ## Manual tracking
63
+
64
+ For providers without a wrapper (Bedrock, custom gateways) or full control:
65
+
66
+ ```python
67
+ from tollgate import create_tollgate_client
68
+
69
+ tollgate = create_tollgate_client()
70
+
71
+ tollgate.track({
72
+ "customerId": "cust_A",
73
+ "runId": "run_12345",
74
+ "provider": "anthropic",
75
+ "model": "claude-sonnet-4-6",
76
+ "tokensIn": 1200,
77
+ "tokensOut": 450,
78
+ "reasoningTokens": 0,
79
+ "cachedTokens": 0,
80
+ "revenueUnitCents": 50,
81
+ "idempotencyKey": "run_12345#step_1", # exactly-once: safe to retry
82
+ })
83
+ ```
84
+
85
+ ## Notes
86
+
87
+ - **Idempotent.** Events dedupe on `idempotencyKey` (auto-set to the provider
88
+ response id by the wrappers), so retries never double-count.
89
+ - **No prompt content is ever sent** — only token counts and metadata.
90
+ - **Streaming** responses are not auto-tracked yet (the wrappers only report when
91
+ a non-streaming `usage` is present). Track those manually for now.
92
+ - **Non-blocking.** Auto-instrumented tracking runs on a background thread;
93
+ failures go to `on_error` (default: log a warning) and never break your call.
94
+
95
+ Licensed for use with Tollgate. Not open source.
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "tollgateai"
7
+ version = "0.1.0"
8
+ description = "Track real LLM model usage and compute live gross margin with Tollgate."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = { text = "Proprietary" }
12
+ authors = [{ name = "Tollgate" }]
13
+ keywords = ["llm", "tokens", "cost", "margin", "observability", "anthropic", "openai", "tollgate"]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "Intended Audience :: Developers",
17
+ "Topic :: Software Development :: Libraries",
18
+ ]
19
+ dependencies = []
20
+
21
+ [project.urls]
22
+ Homepage = "https://tollgateapp.vercel.app"
23
+
24
+ [tool.hatch.build.targets.wheel]
25
+ packages = ["src/tollgate"]
26
+
27
+ [tool.hatch.build.targets.sdist]
28
+ include = ["src/tollgate", "README.md"]
@@ -0,0 +1,26 @@
1
+ """Tollgate Python SDK — track real LLM usage and compute live gross margin."""
2
+
3
+ from .client import (
4
+ TollgateClient,
5
+ TollgateError,
6
+ create_tollgate_client,
7
+ )
8
+ from .instrument import (
9
+ anthropic_event_from,
10
+ openai_event_from,
11
+ wrap_anthropic,
12
+ wrap_openai,
13
+ )
14
+
15
+ __version__ = "0.1.0"
16
+
17
+ __all__ = [
18
+ "TollgateClient",
19
+ "TollgateError",
20
+ "create_tollgate_client",
21
+ "wrap_anthropic",
22
+ "wrap_openai",
23
+ "anthropic_event_from",
24
+ "openai_event_from",
25
+ "__version__",
26
+ ]
@@ -0,0 +1,82 @@
1
+ """Tollgate ingest client. Zero dependencies — uses urllib from the stdlib."""
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ import urllib.error
7
+ import urllib.request
8
+ from typing import Any, Dict, Optional
9
+
10
+ DEFAULT_BASE_URL = "https://tollgateapp.vercel.app"
11
+
12
+
13
+ class TollgateError(Exception):
14
+ def __init__(self, message: str, status: Optional[int] = None, body: Any = None):
15
+ super().__init__(message)
16
+ self.status = status
17
+ self.body = body
18
+
19
+
20
+ class TollgateClient:
21
+ """Reports usage events to ``POST /api/track``. Idempotent on ``idempotencyKey``."""
22
+
23
+ def __init__(
24
+ self,
25
+ api_key: Optional[str] = None,
26
+ base_url: Optional[str] = None,
27
+ timeout: float = 10.0,
28
+ max_retries: int = 2,
29
+ ):
30
+ self.api_key = api_key or os.environ.get("TOLLGATE_API_KEY")
31
+ self.base_url = (
32
+ base_url or os.environ.get("TOLLGATE_BASE_URL") or DEFAULT_BASE_URL
33
+ ).rstrip("/")
34
+ self.timeout = timeout
35
+ self.max_retries = max_retries
36
+
37
+ def track(self, event: Dict[str, Any]) -> Dict[str, Any]:
38
+ if not self.api_key:
39
+ raise TollgateError("Missing API key — pass api_key or set TOLLGATE_API_KEY.")
40
+
41
+ # Drop None values so server defaults apply.
42
+ payload = json.dumps({k: v for k, v in event.items() if v is not None}).encode("utf-8")
43
+ url = self.base_url + "/api/track"
44
+ headers = {
45
+ "Content-Type": "application/json",
46
+ "Authorization": "Bearer " + self.api_key,
47
+ }
48
+
49
+ last_err: Optional[Exception] = None
50
+ for attempt in range(self.max_retries + 1):
51
+ req = urllib.request.Request(url, data=payload, method="POST", headers=headers)
52
+ try:
53
+ with urllib.request.urlopen(req, timeout=self.timeout) as resp:
54
+ # 200 (duplicate) and 201 (created) both succeed here.
55
+ return json.loads(resp.read().decode("utf-8") or "{}")
56
+ except urllib.error.HTTPError as e:
57
+ status = e.code
58
+ if status >= 500 or status == 429:
59
+ last_err = TollgateError("Tollgate track failed (%d)" % status, status)
60
+ else:
61
+ body = None
62
+ try:
63
+ body = json.loads(e.read().decode("utf-8") or "{}")
64
+ except Exception:
65
+ pass
66
+ raise TollgateError("Tollgate track failed (%d)" % status, status, body)
67
+ except urllib.error.URLError as e:
68
+ last_err = e
69
+
70
+ if attempt < self.max_retries:
71
+ time.sleep(0.2 * (2 ** attempt)) # 0.2s, 0.4s, …
72
+
73
+ raise last_err or TollgateError("Tollgate track failed after retries")
74
+
75
+
76
+ def create_tollgate_client(
77
+ api_key: Optional[str] = None,
78
+ base_url: Optional[str] = None,
79
+ timeout: float = 10.0,
80
+ max_retries: int = 2,
81
+ ) -> TollgateClient:
82
+ return TollgateClient(api_key=api_key, base_url=base_url, timeout=timeout, max_retries=max_retries)
@@ -0,0 +1,173 @@
1
+ """Auto-instrumentation: wrap a provider client so every completion reports its
2
+ REAL usage to Tollgate — no manual token counting. Structurally typed, so this
3
+ package never has to depend on the provider SDKs."""
4
+
5
+ import logging
6
+ import threading
7
+ import uuid
8
+ from typing import Any, Callable, Dict, Optional, Union
9
+
10
+ from .client import TollgateClient
11
+
12
+ logger = logging.getLogger("tollgate")
13
+
14
+ Revenue = Union[int, Callable[[Any], Optional[int]], None]
15
+ RunId = Union[str, Callable[[], str], None]
16
+
17
+
18
+ def _attr(obj: Any, *path: str, default: Any = None) -> Any:
19
+ """Read a nested attribute or dict key path, tolerant of either."""
20
+ cur = obj
21
+ for p in path:
22
+ if cur is None:
23
+ return default
24
+ cur = cur.get(p) if isinstance(cur, dict) else getattr(cur, p, None)
25
+ return cur if cur is not None else default
26
+
27
+
28
+ def _resolve_run_id(run_id: RunId, response_id: Optional[str]) -> str:
29
+ if callable(run_id):
30
+ return run_id()
31
+ return run_id or response_id or str(uuid.uuid4())
32
+
33
+
34
+ def _resolve_revenue(revenue: Revenue, response: Any) -> Optional[int]:
35
+ return revenue(response) if callable(revenue) else revenue
36
+
37
+
38
+ def _fire(tollgate: TollgateClient, event: Dict[str, Any], on_error: Optional[Callable[[Exception], None]]) -> None:
39
+ def run() -> None:
40
+ try:
41
+ tollgate.track(event)
42
+ except Exception as err: # noqa: BLE001 - report, never raise into caller
43
+ (on_error or (lambda e: logger.warning("[tollgate] track failed: %s", e)))(err)
44
+
45
+ threading.Thread(target=run, daemon=True).start()
46
+
47
+
48
+ # --- Anthropic ------------------------------------------------------------
49
+
50
+ def anthropic_event_from(
51
+ msg: Any,
52
+ customer_id: str,
53
+ agent_id: Optional[str] = None,
54
+ run_id: RunId = None,
55
+ revenue_unit_cents: Revenue = None,
56
+ ) -> Optional[Dict[str, Any]]:
57
+ usage = _attr(msg, "usage")
58
+ if usage is None:
59
+ return None
60
+ rid = _resolve_run_id(run_id, _attr(msg, "id"))
61
+ return {
62
+ "customerId": customer_id,
63
+ "agentId": agent_id,
64
+ "runId": rid,
65
+ "provider": "anthropic",
66
+ "model": _attr(msg, "model", default="unknown"),
67
+ "tokensIn": _attr(usage, "input_tokens", default=0),
68
+ "tokensOut": _attr(usage, "output_tokens", default=0),
69
+ "cachedTokens": _attr(usage, "cache_read_input_tokens", default=0),
70
+ "revenueUnitCents": _resolve_revenue(revenue_unit_cents, msg),
71
+ "idempotencyKey": _attr(msg, "id") or rid,
72
+ }
73
+
74
+
75
+ # --- OpenAI ---------------------------------------------------------------
76
+
77
+ def openai_event_from(
78
+ completion: Any,
79
+ customer_id: str,
80
+ agent_id: Optional[str] = None,
81
+ run_id: RunId = None,
82
+ revenue_unit_cents: Revenue = None,
83
+ ) -> Optional[Dict[str, Any]]:
84
+ usage = _attr(completion, "usage")
85
+ if usage is None:
86
+ return None
87
+ rid = _resolve_run_id(run_id, _attr(completion, "id"))
88
+ return {
89
+ "customerId": customer_id,
90
+ "agentId": agent_id,
91
+ "runId": rid,
92
+ "provider": "openai",
93
+ "model": _attr(completion, "model", default="unknown"),
94
+ "tokensIn": _attr(usage, "prompt_tokens", default=0),
95
+ "tokensOut": _attr(usage, "completion_tokens", default=0),
96
+ "reasoningTokens": _attr(usage, "completion_tokens_details", "reasoning_tokens", default=0),
97
+ "cachedTokens": _attr(usage, "prompt_tokens_details", "cached_tokens", default=0),
98
+ "revenueUnitCents": _resolve_revenue(revenue_unit_cents, completion),
99
+ "idempotencyKey": _attr(completion, "id") or rid,
100
+ }
101
+
102
+
103
+ # --- Proxy plumbing -------------------------------------------------------
104
+
105
+ class _CreateInterceptor:
106
+ """Wraps an object exposing ``create(...)`` and reports the response."""
107
+
108
+ def __init__(self, inner: Any, hook: Callable[[Any], None]):
109
+ self._inner = inner
110
+ self._hook = hook
111
+
112
+ def create(self, *args: Any, **kwargs: Any) -> Any:
113
+ result = self._inner.create(*args, **kwargs)
114
+ self._hook(result)
115
+ return result
116
+
117
+ def __getattr__(self, name: str) -> Any:
118
+ return getattr(self._inner, name)
119
+
120
+
121
+ class _Proxy:
122
+ """Forwards everything to ``target`` except the keys in ``overrides``."""
123
+
124
+ def __init__(self, target: Any, overrides: Dict[str, Any]):
125
+ object.__setattr__(self, "_target", target)
126
+ object.__setattr__(self, "_overrides", overrides)
127
+
128
+ def __getattr__(self, name: str) -> Any:
129
+ overrides = object.__getattribute__(self, "_overrides")
130
+ if name in overrides:
131
+ return overrides[name]
132
+ return getattr(object.__getattribute__(self, "_target"), name)
133
+
134
+
135
+ def wrap_anthropic(
136
+ client: Any,
137
+ tollgate: TollgateClient,
138
+ customer_id: str,
139
+ agent_id: Optional[str] = None,
140
+ run_id: RunId = None,
141
+ revenue_unit_cents: Revenue = None,
142
+ on_error: Optional[Callable[[Exception], None]] = None,
143
+ ) -> Any:
144
+ """Wrap an Anthropic client so ``messages.create`` auto-reports real usage."""
145
+
146
+ def hook(result: Any) -> None:
147
+ event = anthropic_event_from(result, customer_id, agent_id, run_id, revenue_unit_cents)
148
+ if event:
149
+ _fire(tollgate, event, on_error)
150
+
151
+ messages = _CreateInterceptor(client.messages, hook)
152
+ return _Proxy(client, {"messages": messages})
153
+
154
+
155
+ def wrap_openai(
156
+ client: Any,
157
+ tollgate: TollgateClient,
158
+ customer_id: str,
159
+ agent_id: Optional[str] = None,
160
+ run_id: RunId = None,
161
+ revenue_unit_cents: Revenue = None,
162
+ on_error: Optional[Callable[[Exception], None]] = None,
163
+ ) -> Any:
164
+ """Wrap an OpenAI client so ``chat.completions.create`` auto-reports usage."""
165
+
166
+ def hook(result: Any) -> None:
167
+ event = openai_event_from(result, customer_id, agent_id, run_id, revenue_unit_cents)
168
+ if event:
169
+ _fire(tollgate, event, on_error)
170
+
171
+ completions = _CreateInterceptor(client.chat.completions, hook)
172
+ chat = _Proxy(client.chat, {"completions": completions})
173
+ return _Proxy(client, {"chat": chat})