spendguard 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spendguard/__init__.py +36 -0
- spendguard/config/__init__.py +0 -0
- spendguard/config/pricing_anthropic.json +7 -0
- spendguard/config/pricing_openai.json +7 -0
- spendguard/context.py +46 -0
- spendguard/cost/__init__.py +5 -0
- spendguard/cost/calculator.py +21 -0
- spendguard/cost/estimator.py +45 -0
- spendguard/cost/pricing.py +77 -0
- spendguard/events.py +112 -0
- spendguard/exceptions.py +31 -0
- spendguard/providers/__init__.py +5 -0
- spendguard/providers/anthropic_provider.py +13 -0
- spendguard/providers/base.py +31 -0
- spendguard/providers/openai_provider.py +13 -0
- spendguard/session.py +85 -0
- spendguard/tracker.py +131 -0
- spendguard/wrappers/__init__.py +4 -0
- spendguard/wrappers/_messages.py +15 -0
- spendguard/wrappers/anthropic.py +118 -0
- spendguard/wrappers/openai.py +147 -0
- spendguard-0.1.0.dist-info/METADATA +145 -0
- spendguard-0.1.0.dist-info/RECORD +24 -0
- spendguard-0.1.0.dist-info/WHEEL +4 -0
spendguard/__init__.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""spendguard -- blocks an over-budget LLM API call before it happens.
|
|
2
|
+
|
|
3
|
+
Build status: Stage 4 (core platform build), matching README.md's quickstart.
|
|
4
|
+
SpendGuard.wrap_openai() / wrap_anthropic() / track() are implemented and
|
|
5
|
+
gate client.chat.completions.create() / client.messages.create() respectively
|
|
6
|
+
-- every other client attribute (embeddings, models, ...) and streaming calls
|
|
7
|
+
(stream=True) are explicitly out of scope for this MVP wrapper, not silently
|
|
8
|
+
mishandled. Pricing data in config/ is placeholder, not verified current
|
|
9
|
+
rates -- see cost/pricing.py.
|
|
10
|
+
"""
|
|
11
|
+
from .exceptions import BudgetError, BudgetExceededError, PricingDataError
|
|
12
|
+
from .tracker import SpendTracker
|
|
13
|
+
from .cost import CostCalculator, CostEstimator, ModelPrice, PricingTable
|
|
14
|
+
from .providers import AnthropicProvider, OpenAIProvider, Provider, Usage
|
|
15
|
+
from .session import SpendGuard
|
|
16
|
+
from .wrappers import AnthropicClientWrapper, OpenAIClientWrapper
|
|
17
|
+
|
|
18
|
+
__version__ = "0.1.0"
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"SpendGuard",
|
|
22
|
+
"SpendTracker",
|
|
23
|
+
"BudgetError",
|
|
24
|
+
"BudgetExceededError",
|
|
25
|
+
"PricingDataError",
|
|
26
|
+
"CostCalculator",
|
|
27
|
+
"CostEstimator",
|
|
28
|
+
"ModelPrice",
|
|
29
|
+
"PricingTable",
|
|
30
|
+
"Provider",
|
|
31
|
+
"Usage",
|
|
32
|
+
"OpenAIProvider",
|
|
33
|
+
"AnthropicProvider",
|
|
34
|
+
"OpenAIClientWrapper",
|
|
35
|
+
"AnthropicClientWrapper",
|
|
36
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Pricing last verified 2026-06-25 against anthropic.com/pricing. Update _version_date and rates when Anthropic publishes a price change.",
|
|
3
|
+
"_version_date": "2026-06-25",
|
|
4
|
+
"claude-haiku-4-5": {"input_per_million": 1.00, "output_per_million": 5.00},
|
|
5
|
+
"claude-sonnet-4-6": {"input_per_million": 3.00, "output_per_million": 15.00},
|
|
6
|
+
"claude-opus-4-6": {"input_per_million": 15.00, "output_per_million": 75.00}
|
|
7
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Pricing last verified 2026-06-25 against openai.com/api/pricing. Update _version_date and rates when OpenAI publishes a price change.",
|
|
3
|
+
"_version_date": "2026-06-25",
|
|
4
|
+
"gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.60},
|
|
5
|
+
"gpt-4o": {"input_per_million": 2.50, "output_per_million": 10.00},
|
|
6
|
+
"gpt-4.1-mini": {"input_per_million": 0.40, "output_per_million": 1.60}
|
|
7
|
+
}
|
spendguard/context.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Thread-local override state shared between SpendGuard.track() and the
|
|
2
|
+
provider wrappers it gates.
|
|
3
|
+
|
|
4
|
+
`with guard.track(override=True):` has to affect only calls made on the
|
|
5
|
+
current thread inside that block, not every thread sharing the same
|
|
6
|
+
SpendGuard -- otherwise one thread's override would silently apply to
|
|
7
|
+
another's concurrent call.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import threading
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OverrideContext:
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._local = threading.local()
|
|
17
|
+
|
|
18
|
+
def push(self, override: bool) -> None:
|
|
19
|
+
stack = getattr(self._local, "stack", None)
|
|
20
|
+
if stack is None:
|
|
21
|
+
stack = []
|
|
22
|
+
self._local.stack = stack
|
|
23
|
+
stack.append(override)
|
|
24
|
+
|
|
25
|
+
def pop(self) -> None:
|
|
26
|
+
self._local.stack.pop()
|
|
27
|
+
|
|
28
|
+
def current(self) -> bool:
|
|
29
|
+
stack = getattr(self._local, "stack", None)
|
|
30
|
+
return bool(stack) and stack[-1]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TrackContext:
|
|
34
|
+
"""Returned by SpendGuard.track() -- see README.md's "Overriding a block on purpose"."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, override_context: OverrideContext, override: bool) -> None:
|
|
37
|
+
self._override_context = override_context
|
|
38
|
+
self._override = override
|
|
39
|
+
|
|
40
|
+
def __enter__(self) -> "TrackContext":
|
|
41
|
+
self._override_context.push(self._override)
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def __exit__(self, exc_type, exc, tb) -> bool:
|
|
45
|
+
self._override_context.pop()
|
|
46
|
+
return False
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""CostCalculator -- turns real, post-call token usage into an actual dollar cost.
|
|
2
|
+
|
|
3
|
+
Always the source of truth recorded into SpendTracker.commit() -- never the
|
|
4
|
+
pre-call estimate, once the provider's real usage numbers are known.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from ..providers.base import Usage
|
|
9
|
+
from .pricing import PricingTable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CostCalculator:
|
|
13
|
+
def __init__(self, pricing: PricingTable) -> None:
|
|
14
|
+
self._pricing = pricing
|
|
15
|
+
|
|
16
|
+
def actual_cost_usd(self, provider: str, model: str, usage: Usage) -> float:
|
|
17
|
+
price = self._pricing.get_price(provider, model)
|
|
18
|
+
return (
|
|
19
|
+
usage.input_tokens / 1_000_000 * price.input_per_million
|
|
20
|
+
+ usage.output_tokens / 1_000_000 * price.output_per_million
|
|
21
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""CostEstimator -- pre-call cost estimate from a prompt and max output size.
|
|
2
|
+
|
|
3
|
+
Zero required dependency: input tokens are approximated at ~4 characters per
|
|
4
|
+
token unless tiktoken is installed (pip install spendguard[tiktoken]), in
|
|
5
|
+
which case OpenAI prompts get exact cl100k_base counts. The estimate only has
|
|
6
|
+
to be close enough to gate correctly -- CostCalculator always recomputes the
|
|
7
|
+
real cost from the provider's own usage numbers after the call resolves.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from .pricing import PricingTable
|
|
12
|
+
|
|
13
|
+
CHARS_PER_TOKEN_APPROX = 4
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import tiktoken
|
|
17
|
+
|
|
18
|
+
_ENCODING = tiktoken.get_encoding("cl100k_base")
|
|
19
|
+
except ImportError:
|
|
20
|
+
_ENCODING = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _count_input_tokens(prompt_text: str, provider: str) -> int:
|
|
24
|
+
if _ENCODING is not None and provider == "openai":
|
|
25
|
+
return max(1, len(_ENCODING.encode(prompt_text)))
|
|
26
|
+
return max(1, len(prompt_text) // CHARS_PER_TOKEN_APPROX)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CostEstimator:
|
|
30
|
+
def __init__(self, pricing: PricingTable) -> None:
|
|
31
|
+
self._pricing = pricing
|
|
32
|
+
|
|
33
|
+
def estimate_usd(
|
|
34
|
+
self,
|
|
35
|
+
provider: str,
|
|
36
|
+
model: str,
|
|
37
|
+
prompt_text: str,
|
|
38
|
+
max_output_tokens: int,
|
|
39
|
+
) -> float:
|
|
40
|
+
price = self._pricing.get_price(provider, model)
|
|
41
|
+
input_tokens = _count_input_tokens(prompt_text, provider)
|
|
42
|
+
return (
|
|
43
|
+
input_tokens / 1_000_000 * price.input_per_million
|
|
44
|
+
+ max_output_tokens / 1_000_000 * price.output_per_million
|
|
45
|
+
)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""PricingTable -- loads per-provider model pricing from config/pricing_<provider>.json.
|
|
2
|
+
|
|
3
|
+
Adding a new provider's prices later is a new config/pricing_<provider>.json
|
|
4
|
+
file, not a code change here. Keys starting with "_" (e.g. "_note") are
|
|
5
|
+
metadata, not models, and are skipped when loading.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import warnings
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import date, datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, Optional
|
|
16
|
+
|
|
17
|
+
from ..exceptions import PricingDataError
|
|
18
|
+
|
|
19
|
+
_STALENESS_DAYS = 90
|
|
20
|
+
_log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
DEFAULT_CONFIG_DIR = Path(__file__).resolve().parent.parent / "config"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class ModelPrice:
|
|
27
|
+
input_per_million: float
|
|
28
|
+
output_per_million: float
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PricingTable:
|
|
32
|
+
def __init__(self, config_dir: Optional[Path] = None) -> None:
|
|
33
|
+
self._config_dir = config_dir if config_dir is not None else DEFAULT_CONFIG_DIR
|
|
34
|
+
self._cache: Dict[str, Dict[str, ModelPrice]] = {}
|
|
35
|
+
|
|
36
|
+
def _load_provider(self, provider: str) -> Dict[str, ModelPrice]:
|
|
37
|
+
if provider in self._cache:
|
|
38
|
+
return self._cache[provider]
|
|
39
|
+
|
|
40
|
+
path = self._config_dir / f"pricing_{provider}.json"
|
|
41
|
+
if not path.exists():
|
|
42
|
+
raise PricingDataError(f"no pricing config for provider '{provider}' (looked for {path})")
|
|
43
|
+
|
|
44
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
45
|
+
|
|
46
|
+
version_date_str = raw.get("_version_date")
|
|
47
|
+
if version_date_str:
|
|
48
|
+
try:
|
|
49
|
+
version_date = datetime.strptime(version_date_str, "%Y-%m-%d").date()
|
|
50
|
+
age_days = (date.today() - version_date).days
|
|
51
|
+
if age_days > _STALENESS_DAYS:
|
|
52
|
+
warnings.warn(
|
|
53
|
+
f"SpendGuard: {provider} pricing data is {age_days} days old "
|
|
54
|
+
f"(last verified {version_date_str}). Cost estimates may be inaccurate "
|
|
55
|
+
f"if {provider} has changed their prices. Update config/pricing_{provider}.json "
|
|
56
|
+
f"or pass a custom config_dir to PricingTable().",
|
|
57
|
+
stacklevel=3,
|
|
58
|
+
)
|
|
59
|
+
except ValueError:
|
|
60
|
+
_log.debug("Could not parse _version_date '%s' in pricing_%s.json", version_date_str, provider)
|
|
61
|
+
|
|
62
|
+
prices = {
|
|
63
|
+
model: ModelPrice(
|
|
64
|
+
input_per_million=entry["input_per_million"],
|
|
65
|
+
output_per_million=entry["output_per_million"],
|
|
66
|
+
)
|
|
67
|
+
for model, entry in raw.items()
|
|
68
|
+
if not model.startswith("_")
|
|
69
|
+
}
|
|
70
|
+
self._cache[provider] = prices
|
|
71
|
+
return prices
|
|
72
|
+
|
|
73
|
+
def get_price(self, provider: str, model: str) -> ModelPrice:
|
|
74
|
+
prices = self._load_provider(provider)
|
|
75
|
+
if model not in prices:
|
|
76
|
+
raise PricingDataError(f"unknown model '{model}' for provider '{provider}'")
|
|
77
|
+
return prices[model]
|
spendguard/events.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Local spend-event log -- zero-backend audit trail.
|
|
2
|
+
|
|
3
|
+
release-gates.md's Observability readiness gate requires "a way to tell,
|
|
4
|
+
after the fact, what happened during a failed run." For an SDK with no
|
|
5
|
+
backend and no account, that has to be a local file: one JSON line per
|
|
6
|
+
gated call, recording what was estimated, what was decided, what it took,
|
|
7
|
+
and what was actually spent. This is also the file a future V1 Ingestion API
|
|
8
|
+
would read from (prd.md's Dependency Map) -- spendguard doesn't need that
|
|
9
|
+
API to exist yet, it just needs to not lose the data that API will
|
|
10
|
+
eventually want.
|
|
11
|
+
|
|
12
|
+
Rotation: release-gates.md's cost-model gate requires "logging volume...
|
|
13
|
+
has an explicit limit or sampling rule" -- an unbounded append-only file
|
|
14
|
+
would otherwise grow forever for a high-call-volume user (the exact
|
|
15
|
+
runaway-agent scenario this product exists to catch). max_bytes bounds disk
|
|
16
|
+
usage to roughly 2x its value: one rotated file is kept (events.jsonl.1),
|
|
17
|
+
the previous rotation is discarded, never an unbounded history.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import threading
|
|
23
|
+
from dataclasses import asdict, dataclass
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
DEFAULT_EVENT_LOG_PATH = Path(".spendguard") / "events.jsonl"
|
|
29
|
+
DEFAULT_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _now_iso() -> str:
|
|
33
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class SpendEvent:
|
|
38
|
+
at: str
|
|
39
|
+
workspace: str
|
|
40
|
+
provider: str
|
|
41
|
+
model: str
|
|
42
|
+
decision: str # "allowed" | "allowed_with_override" | "blocked" | "rolled_back"
|
|
43
|
+
estimated_cost_usd: float
|
|
44
|
+
actual_cost_usd: Optional[float] = None
|
|
45
|
+
reservation_id: Optional[str] = None
|
|
46
|
+
latency_ms: Optional[float] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class EventLog:
|
|
50
|
+
"""Thread-safe append-only JSONL writer, with size-based rotation.
|
|
51
|
+
|
|
52
|
+
Output only -- spendguard never reads this back to make a decision in the
|
|
53
|
+
MVP. The lock here is separate from SpendTracker's: it protects the file
|
|
54
|
+
write, not the budget arithmetic.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, path: Path, max_bytes: Optional[int] = DEFAULT_MAX_BYTES) -> None:
|
|
58
|
+
self._path = path
|
|
59
|
+
self._max_bytes = max_bytes
|
|
60
|
+
self._lock = threading.Lock()
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def path(self) -> Path:
|
|
64
|
+
return self._path
|
|
65
|
+
|
|
66
|
+
def _rotate_if_needed(self, incoming_bytes: int) -> None:
|
|
67
|
+
if self._max_bytes is None:
|
|
68
|
+
return
|
|
69
|
+
try:
|
|
70
|
+
current_size = self._path.stat().st_size
|
|
71
|
+
except FileNotFoundError:
|
|
72
|
+
return
|
|
73
|
+
if current_size + incoming_bytes <= self._max_bytes:
|
|
74
|
+
return
|
|
75
|
+
rotated = self._path.with_name(self._path.name + ".1")
|
|
76
|
+
if rotated.exists():
|
|
77
|
+
rotated.unlink()
|
|
78
|
+
self._path.rename(rotated)
|
|
79
|
+
|
|
80
|
+
def append(self, event: SpendEvent) -> None:
|
|
81
|
+
with self._lock:
|
|
82
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
83
|
+
line = json.dumps(asdict(event)) + "\n"
|
|
84
|
+
self._rotate_if_needed(len(line.encode("utf-8")))
|
|
85
|
+
with self._path.open("a", encoding="utf-8") as f:
|
|
86
|
+
f.write(line)
|
|
87
|
+
|
|
88
|
+
def record(
|
|
89
|
+
self,
|
|
90
|
+
*,
|
|
91
|
+
workspace: str,
|
|
92
|
+
provider: str,
|
|
93
|
+
model: str,
|
|
94
|
+
decision: str,
|
|
95
|
+
estimated_cost_usd: float,
|
|
96
|
+
actual_cost_usd: Optional[float] = None,
|
|
97
|
+
reservation_id: Optional[str] = None,
|
|
98
|
+
latency_ms: Optional[float] = None,
|
|
99
|
+
) -> None:
|
|
100
|
+
self.append(
|
|
101
|
+
SpendEvent(
|
|
102
|
+
at=_now_iso(),
|
|
103
|
+
workspace=workspace,
|
|
104
|
+
provider=provider,
|
|
105
|
+
model=model,
|
|
106
|
+
decision=decision,
|
|
107
|
+
estimated_cost_usd=estimated_cost_usd,
|
|
108
|
+
actual_cost_usd=actual_cost_usd,
|
|
109
|
+
reservation_id=reservation_id,
|
|
110
|
+
latency_ms=latency_ms,
|
|
111
|
+
)
|
|
112
|
+
)
|
spendguard/exceptions.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Exceptions raised by spendguard's budget enforcement."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BudgetError(Exception):
|
|
6
|
+
"""Base class for all spendguard budget errors."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BudgetExceededError(BudgetError):
|
|
10
|
+
"""Raised when a call's estimated cost would cross the ceiling's threshold.
|
|
11
|
+
|
|
12
|
+
Carries the numbers that caused the block so a caller (or its except clause)
|
|
13
|
+
can report something more useful than the message string alone.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
message: str,
|
|
19
|
+
*,
|
|
20
|
+
estimated_cost: float,
|
|
21
|
+
remaining: float,
|
|
22
|
+
threshold_usd: float,
|
|
23
|
+
) -> None:
|
|
24
|
+
super().__init__(message)
|
|
25
|
+
self.estimated_cost = estimated_cost
|
|
26
|
+
self.remaining = remaining
|
|
27
|
+
self.threshold_usd = threshold_usd
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PricingDataError(BudgetError):
|
|
31
|
+
"""Raised when a model's pricing can't be resolved (unknown model, bad config)."""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from .base import Provider, Usage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AnthropicProvider(Provider):
|
|
9
|
+
name = "anthropic"
|
|
10
|
+
|
|
11
|
+
def extract_usage(self, response: Any) -> Usage:
|
|
12
|
+
usage = response.usage
|
|
13
|
+
return Usage(input_tokens=usage.input_tokens, output_tokens=usage.output_tokens)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Provider abstraction -- the seam a new LLM provider plugs into.
|
|
2
|
+
|
|
3
|
+
Adding a provider beyond OpenAI/Anthropic (Gemini, Perplexity, etc. -- out of
|
|
4
|
+
scope today per product-thesis.md's non-goals, but the reason this seam
|
|
5
|
+
exists) means implementing extract_usage() below plus a pricing config file.
|
|
6
|
+
SpendTracker, CostEstimator, and CostCalculator never reference a provider's
|
|
7
|
+
SDK or response shape directly -- they only see Usage and dollar amounts.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class Usage:
|
|
18
|
+
"""Real token counts for one completed call, provider-shape already stripped off."""
|
|
19
|
+
|
|
20
|
+
input_tokens: int
|
|
21
|
+
output_tokens: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Provider(ABC):
|
|
25
|
+
"""Translates one provider SDK's response object into a Usage."""
|
|
26
|
+
|
|
27
|
+
name: str
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def extract_usage(self, response: Any) -> Usage:
|
|
31
|
+
"""Pull real token counts out of a completed API response."""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from .base import Provider, Usage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OpenAIProvider(Provider):
|
|
9
|
+
name = "openai"
|
|
10
|
+
|
|
11
|
+
def extract_usage(self, response: Any) -> Usage:
|
|
12
|
+
usage = response.usage
|
|
13
|
+
return Usage(input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens)
|
spendguard/session.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""SpendGuard -- the public entry point described in README.md's quickstart.
|
|
2
|
+
|
|
3
|
+
wrap_openai() / wrap_anthropic() are the primary integration pattern: wrap
|
|
4
|
+
the client once at construction, then call it exactly like the real client.
|
|
5
|
+
track() is the escape hatch for README.md's "Overriding a block on purpose".
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from .context import OverrideContext, TrackContext
|
|
13
|
+
from .cost.calculator import CostCalculator
|
|
14
|
+
from .cost.estimator import CostEstimator
|
|
15
|
+
from .cost.pricing import PricingTable
|
|
16
|
+
from .events import DEFAULT_EVENT_LOG_PATH, DEFAULT_MAX_BYTES, EventLog
|
|
17
|
+
from .tracker import DEFAULT_THRESHOLD_PCT, SpendTracker
|
|
18
|
+
from .wrappers.anthropic import AnthropicClientWrapper
|
|
19
|
+
from .wrappers.openai import OpenAIClientWrapper
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SpendGuard:
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
workspace: str,
|
|
26
|
+
ceiling_usd: float,
|
|
27
|
+
threshold_pct: float = DEFAULT_THRESHOLD_PCT,
|
|
28
|
+
pricing_config_dir: Optional[Path] = None,
|
|
29
|
+
event_log_path: Optional[Path] = None,
|
|
30
|
+
event_log_max_bytes: Optional[int] = DEFAULT_MAX_BYTES,
|
|
31
|
+
log_events: bool = True,
|
|
32
|
+
) -> None:
|
|
33
|
+
self.workspace = workspace
|
|
34
|
+
self._tracker = SpendTracker(ceiling_usd, threshold_pct)
|
|
35
|
+
pricing = PricingTable(config_dir=pricing_config_dir)
|
|
36
|
+
self._estimator = CostEstimator(pricing)
|
|
37
|
+
self._calculator = CostCalculator(pricing)
|
|
38
|
+
self._override_context = OverrideContext()
|
|
39
|
+
self._event_log = (
|
|
40
|
+
EventLog(
|
|
41
|
+
event_log_path if event_log_path is not None else DEFAULT_EVENT_LOG_PATH,
|
|
42
|
+
max_bytes=event_log_max_bytes,
|
|
43
|
+
)
|
|
44
|
+
if log_events
|
|
45
|
+
else None
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def wrap_openai(self, client: Any) -> OpenAIClientWrapper:
|
|
49
|
+
return OpenAIClientWrapper(
|
|
50
|
+
client,
|
|
51
|
+
self._tracker,
|
|
52
|
+
self._estimator,
|
|
53
|
+
self._calculator,
|
|
54
|
+
self._override_context,
|
|
55
|
+
self.workspace,
|
|
56
|
+
self._event_log,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def wrap_anthropic(self, client: Any) -> AnthropicClientWrapper:
|
|
60
|
+
return AnthropicClientWrapper(
|
|
61
|
+
client,
|
|
62
|
+
self._tracker,
|
|
63
|
+
self._estimator,
|
|
64
|
+
self._calculator,
|
|
65
|
+
self._override_context,
|
|
66
|
+
self.workspace,
|
|
67
|
+
self._event_log,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def track(self, model: Optional[str] = None, override: bool = False) -> TrackContext:
|
|
71
|
+
"""Escape hatch for one call -- see README.md's "Overriding a block on purpose".
|
|
72
|
+
|
|
73
|
+
`model` is accepted for forward-compatible per-call labeling (planned
|
|
74
|
+
for the V1.1 event log) but has no effect yet; `override` is the only
|
|
75
|
+
thing this currently does.
|
|
76
|
+
"""
|
|
77
|
+
return TrackContext(self._override_context, override)
|
|
78
|
+
|
|
79
|
+
def get_summary(self) -> dict:
|
|
80
|
+
return self._tracker.get_summary()
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def event_log_path(self) -> Optional[Path]:
|
|
84
|
+
"""Where local spend events are being written, or None if logging is disabled."""
|
|
85
|
+
return self._event_log.path if self._event_log is not None else None
|
spendguard/tracker.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""SpendTracker — thread-safe ceiling enforcement with atomic reservation.
|
|
2
|
+
|
|
3
|
+
Per README.md's "How the ceiling actually works" and prd.md's matching workflow,
|
|
4
|
+
spendguard mirrors runtime/cost_ledger.py's hard rule: a call is blocked once
|
|
5
|
+
cumulative spend plus its estimate would cross 25% of the founder-set ceiling.
|
|
6
|
+
|
|
7
|
+
cost_ledger.py checks that rule against spend already recorded, which is safe for
|
|
8
|
+
a single sequential caller but not for concurrent ones -- two calls firing at the
|
|
9
|
+
same instant can each see room under the threshold and both proceed, together
|
|
10
|
+
overshooting it. That gap is exactly the failure mode this product's own pitch
|
|
11
|
+
describes (a fan-out or retry loop spending faster than a check-then-record
|
|
12
|
+
ledger can track) and is why the PRD now requires this tracker to make the
|
|
13
|
+
check-and-reserve step atomic: estimated cost is reserved against the threshold
|
|
14
|
+
under a single lock *before* a call is allowed to proceed, then released and
|
|
15
|
+
converted to real spend (commit) or discarded (rollback) once the call resolves.
|
|
16
|
+
At every point in time, spent + reserved <= threshold.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import threading
|
|
21
|
+
import uuid
|
|
22
|
+
from typing import Dict
|
|
23
|
+
|
|
24
|
+
from .exceptions import BudgetExceededError
|
|
25
|
+
|
|
26
|
+
DEFAULT_THRESHOLD_PCT = 0.25
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SpendTracker:
|
|
30
|
+
"""Tracks spend and in-flight reservations against a ceiling's 25% threshold."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, ceiling_usd: float, threshold_pct: float = DEFAULT_THRESHOLD_PCT) -> None:
|
|
33
|
+
if ceiling_usd < 0:
|
|
34
|
+
raise ValueError("ceiling_usd cannot be negative")
|
|
35
|
+
if not 0 < threshold_pct <= 1:
|
|
36
|
+
raise ValueError("threshold_pct must be between 0 and 1")
|
|
37
|
+
|
|
38
|
+
self._ceiling = float(ceiling_usd)
|
|
39
|
+
self._threshold_pct = threshold_pct
|
|
40
|
+
self._threshold = self._ceiling * threshold_pct
|
|
41
|
+
self._spent = 0.0
|
|
42
|
+
self._reserved = 0.0
|
|
43
|
+
self._reservations: Dict[str, float] = {}
|
|
44
|
+
self._lock = threading.Lock()
|
|
45
|
+
|
|
46
|
+
def check_and_reserve(self, estimated_cost: float, override: bool = False) -> str:
|
|
47
|
+
"""Atomically check the threshold and reserve funds for a call.
|
|
48
|
+
|
|
49
|
+
Raises BudgetExceededError if the estimate would cross the threshold,
|
|
50
|
+
unless override=True -- an explicit override still reserves (and will
|
|
51
|
+
still show up as spend once committed), it just skips the block.
|
|
52
|
+
|
|
53
|
+
Returns a reservation ID to pass to commit() or rollback().
|
|
54
|
+
"""
|
|
55
|
+
with self._lock:
|
|
56
|
+
remaining = self._threshold - self._spent - self._reserved
|
|
57
|
+
if estimated_cost > remaining and not override:
|
|
58
|
+
raise BudgetExceededError(
|
|
59
|
+
f"Estimated cost ${estimated_cost:.6f} would exceed the remaining "
|
|
60
|
+
f"${remaining:.6f} under the ${self._threshold:.2f} threshold "
|
|
61
|
+
f"({self._threshold_pct:.0%} of ${self._ceiling:.2f} ceiling)",
|
|
62
|
+
estimated_cost=estimated_cost,
|
|
63
|
+
remaining=remaining,
|
|
64
|
+
threshold_usd=self._threshold,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
reservation_id = str(uuid.uuid4())
|
|
68
|
+
self._reserved += estimated_cost
|
|
69
|
+
self._reservations[reservation_id] = estimated_cost
|
|
70
|
+
return reservation_id
|
|
71
|
+
|
|
72
|
+
def commit(self, reservation_id: str, actual_cost: float) -> None:
|
|
73
|
+
"""Release a reservation and record the real cost once a call succeeds."""
|
|
74
|
+
with self._lock:
|
|
75
|
+
if reservation_id not in self._reservations:
|
|
76
|
+
raise KeyError(f"no such reservation: {reservation_id}")
|
|
77
|
+
reserved_amount = self._reservations.pop(reservation_id)
|
|
78
|
+
self._reserved -= reserved_amount
|
|
79
|
+
self._spent += actual_cost
|
|
80
|
+
|
|
81
|
+
def rollback(self, reservation_id: str) -> None:
|
|
82
|
+
"""Release a reservation with no spend recorded, e.g. after a failed call.
|
|
83
|
+
|
|
84
|
+
Idempotent -- rolling back a reservation that's already gone is a no-op,
|
|
85
|
+
not an error, since a caller's cleanup path may run more than once.
|
|
86
|
+
"""
|
|
87
|
+
with self._lock:
|
|
88
|
+
reserved_amount = self._reservations.pop(reservation_id, None)
|
|
89
|
+
if reserved_amount is not None:
|
|
90
|
+
self._reserved -= reserved_amount
|
|
91
|
+
|
|
92
|
+
def get_ceiling_usd(self) -> float:
|
|
93
|
+
return self._ceiling
|
|
94
|
+
|
|
95
|
+
def get_threshold_usd(self) -> float:
|
|
96
|
+
return self._threshold
|
|
97
|
+
|
|
98
|
+
def get_spent(self) -> float:
|
|
99
|
+
with self._lock:
|
|
100
|
+
return self._spent
|
|
101
|
+
|
|
102
|
+
def get_reserved(self) -> float:
|
|
103
|
+
with self._lock:
|
|
104
|
+
return self._reserved
|
|
105
|
+
|
|
106
|
+
def get_remaining(self) -> float:
|
|
107
|
+
"""Room left under the threshold, accounting for in-flight reservations."""
|
|
108
|
+
with self._lock:
|
|
109
|
+
return self._threshold - self._spent - self._reserved
|
|
110
|
+
|
|
111
|
+
def get_summary(self) -> dict:
|
|
112
|
+
with self._lock:
|
|
113
|
+
spent, reserved = self._spent, self._reserved
|
|
114
|
+
utilization = (spent + reserved) / self._threshold * 100 if self._threshold > 0 else 0.0
|
|
115
|
+
return {
|
|
116
|
+
"ceiling_usd": self._ceiling,
|
|
117
|
+
"threshold_pct": self._threshold_pct,
|
|
118
|
+
"threshold_usd": self._threshold,
|
|
119
|
+
"spent": spent,
|
|
120
|
+
"reserved": reserved,
|
|
121
|
+
"remaining": self._threshold - spent - reserved,
|
|
122
|
+
"utilization_percent": utilization,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def reset(self) -> None:
|
|
126
|
+
"""Reset spent and reservations to zero. Does not cancel in-flight calls --
|
|
127
|
+
only call this when certain nothing is pending."""
|
|
128
|
+
with self._lock:
|
|
129
|
+
self._spent = 0.0
|
|
130
|
+
self._reserved = 0.0
|
|
131
|
+
self._reservations.clear()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Shared helper: flatten a chat-style messages list into plain text for the
|
|
2
|
+
pre-call token estimate. OpenAI's and Anthropic's message dicts use the same
|
|
3
|
+
{"role": ..., "content": "..."} shape for simple text content.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Iterable
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def flatten_messages(messages: Iterable[dict]) -> str:
|
|
11
|
+
parts = []
|
|
12
|
+
for message in messages:
|
|
13
|
+
content = message.get("content", "")
|
|
14
|
+
parts.append(content if isinstance(content, str) else str(content))
|
|
15
|
+
return "\n".join(parts)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""AnthropicClientWrapper -- drop-in wrap of an anthropic.Anthropic() client.
|
|
2
|
+
|
|
3
|
+
Gates client.messages.create() only; everything else delegates straight
|
|
4
|
+
through, ungated -- same MVP scope boundary as OpenAIClientWrapper. Streaming
|
|
5
|
+
is rejected outright for the same reason: usage can't be reliably read off a
|
|
6
|
+
streamed response in this version.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
from typing import Any, List, Optional
|
|
12
|
+
|
|
13
|
+
from ..context import OverrideContext
|
|
14
|
+
from ..cost.calculator import CostCalculator
|
|
15
|
+
from ..cost.estimator import CostEstimator
|
|
16
|
+
from ..events import EventLog
|
|
17
|
+
from ..exceptions import BudgetExceededError
|
|
18
|
+
from ..providers.anthropic_provider import AnthropicProvider
|
|
19
|
+
from ..tracker import SpendTracker
|
|
20
|
+
from ._messages import flatten_messages
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class _GatedMessages:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
real_messages: Any,
|
|
27
|
+
tracker: SpendTracker,
|
|
28
|
+
estimator: CostEstimator,
|
|
29
|
+
calculator: CostCalculator,
|
|
30
|
+
override_context: OverrideContext,
|
|
31
|
+
workspace: str,
|
|
32
|
+
event_log: Optional[EventLog],
|
|
33
|
+
) -> None:
|
|
34
|
+
self._real = real_messages
|
|
35
|
+
self._tracker = tracker
|
|
36
|
+
self._estimator = estimator
|
|
37
|
+
self._calculator = calculator
|
|
38
|
+
self._override_context = override_context
|
|
39
|
+
self._workspace = workspace
|
|
40
|
+
self._event_log = event_log
|
|
41
|
+
self._provider = AnthropicProvider()
|
|
42
|
+
|
|
43
|
+
def _log(
|
|
44
|
+
self, model: str, decision: str, estimated_cost: float, actual_cost=None, reservation_id=None, latency_ms=None
|
|
45
|
+
) -> None:
|
|
46
|
+
if self._event_log is not None:
|
|
47
|
+
self._event_log.record(
|
|
48
|
+
workspace=self._workspace,
|
|
49
|
+
provider="anthropic",
|
|
50
|
+
model=model,
|
|
51
|
+
decision=decision,
|
|
52
|
+
estimated_cost_usd=estimated_cost,
|
|
53
|
+
actual_cost_usd=actual_cost,
|
|
54
|
+
reservation_id=reservation_id,
|
|
55
|
+
latency_ms=latency_ms,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def create(self, *, model: str, max_tokens: int, messages: List[dict], **kwargs: Any) -> Any:
|
|
59
|
+
if kwargs.get("stream"):
|
|
60
|
+
raise NotImplementedError(
|
|
61
|
+
"spendguard does not support stream=True yet -- usage can't be "
|
|
62
|
+
"reliably extracted from a streamed response in this version."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
prompt_text = flatten_messages(messages)
|
|
66
|
+
estimated_cost = self._estimator.estimate_usd("anthropic", model, prompt_text, max_tokens)
|
|
67
|
+
|
|
68
|
+
override = self._override_context.current()
|
|
69
|
+
try:
|
|
70
|
+
reservation_id = self._tracker.check_and_reserve(estimated_cost, override=override)
|
|
71
|
+
except BudgetExceededError:
|
|
72
|
+
self._log(model, "blocked", estimated_cost)
|
|
73
|
+
raise
|
|
74
|
+
|
|
75
|
+
started_at = time.monotonic()
|
|
76
|
+
try:
|
|
77
|
+
response = self._real.create(model=model, max_tokens=max_tokens, messages=messages, **kwargs)
|
|
78
|
+
except Exception:
|
|
79
|
+
latency_ms = (time.monotonic() - started_at) * 1000
|
|
80
|
+
self._tracker.rollback(reservation_id)
|
|
81
|
+
self._log(model, "rolled_back", estimated_cost, reservation_id=reservation_id, latency_ms=latency_ms)
|
|
82
|
+
raise
|
|
83
|
+
latency_ms = (time.monotonic() - started_at) * 1000
|
|
84
|
+
|
|
85
|
+
usage = self._provider.extract_usage(response)
|
|
86
|
+
actual_cost = self._calculator.actual_cost_usd("anthropic", model, usage)
|
|
87
|
+
self._tracker.commit(reservation_id, actual_cost)
|
|
88
|
+
self._log(
|
|
89
|
+
model,
|
|
90
|
+
"allowed_with_override" if override else "allowed",
|
|
91
|
+
estimated_cost,
|
|
92
|
+
actual_cost,
|
|
93
|
+
reservation_id,
|
|
94
|
+
latency_ms,
|
|
95
|
+
)
|
|
96
|
+
return response
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class AnthropicClientWrapper:
|
|
100
|
+
"""Wraps a sync anthropic.Anthropic() client. Async clients are not wrapped yet."""
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
client: Any,
|
|
105
|
+
tracker: SpendTracker,
|
|
106
|
+
estimator: CostEstimator,
|
|
107
|
+
calculator: CostCalculator,
|
|
108
|
+
override_context: OverrideContext,
|
|
109
|
+
workspace: str = "default",
|
|
110
|
+
event_log: Optional[EventLog] = None,
|
|
111
|
+
) -> None:
|
|
112
|
+
self._client = client
|
|
113
|
+
self.messages = _GatedMessages(
|
|
114
|
+
client.messages, tracker, estimator, calculator, override_context, workspace, event_log
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def __getattr__(self, name: str) -> Any:
|
|
118
|
+
return getattr(self._client, name)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""OpenAIClientWrapper -- drop-in wrap of an openai.OpenAI() client.
|
|
2
|
+
|
|
3
|
+
Gates client.chat.completions.create() only; every other attribute on the
|
|
4
|
+
client (embeddings, models, files, ...) delegates straight through, ungated.
|
|
5
|
+
That's a deliberate MVP scope boundary, not an oversight -- see README.md's
|
|
6
|
+
"What this does NOT do (yet)". Streaming (stream=True) is rejected outright
|
|
7
|
+
rather than silently mis-tracked, since usage can't be reliably read off a
|
|
8
|
+
streamed response in this version.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
from typing import Any, List, Optional
|
|
14
|
+
|
|
15
|
+
from ..context import OverrideContext
|
|
16
|
+
from ..cost.calculator import CostCalculator
|
|
17
|
+
from ..cost.estimator import CostEstimator
|
|
18
|
+
from ..events import EventLog
|
|
19
|
+
from ..providers.openai_provider import OpenAIProvider
|
|
20
|
+
from ..tracker import SpendTracker
|
|
21
|
+
from ..exceptions import BudgetExceededError
|
|
22
|
+
from ._messages import flatten_messages
|
|
23
|
+
|
|
24
|
+
DEFAULT_MAX_OUTPUT_TOKENS_ESTIMATE = 1024
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class _GatedChatCompletions:
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
real_completions: Any,
|
|
31
|
+
tracker: SpendTracker,
|
|
32
|
+
estimator: CostEstimator,
|
|
33
|
+
calculator: CostCalculator,
|
|
34
|
+
override_context: OverrideContext,
|
|
35
|
+
workspace: str,
|
|
36
|
+
event_log: Optional[EventLog],
|
|
37
|
+
) -> None:
|
|
38
|
+
self._real = real_completions
|
|
39
|
+
self._tracker = tracker
|
|
40
|
+
self._estimator = estimator
|
|
41
|
+
self._calculator = calculator
|
|
42
|
+
self._override_context = override_context
|
|
43
|
+
self._workspace = workspace
|
|
44
|
+
self._event_log = event_log
|
|
45
|
+
self._provider = OpenAIProvider()
|
|
46
|
+
|
|
47
|
+
def _log(
|
|
48
|
+
self, model: str, decision: str, estimated_cost: float, actual_cost=None, reservation_id=None, latency_ms=None
|
|
49
|
+
) -> None:
|
|
50
|
+
if self._event_log is not None:
|
|
51
|
+
self._event_log.record(
|
|
52
|
+
workspace=self._workspace,
|
|
53
|
+
provider="openai",
|
|
54
|
+
model=model,
|
|
55
|
+
decision=decision,
|
|
56
|
+
estimated_cost_usd=estimated_cost,
|
|
57
|
+
actual_cost_usd=actual_cost,
|
|
58
|
+
reservation_id=reservation_id,
|
|
59
|
+
latency_ms=latency_ms,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def create(
|
|
63
|
+
self, *, model: str, messages: List[dict], max_tokens: Optional[int] = None, **kwargs: Any
|
|
64
|
+
) -> Any:
|
|
65
|
+
if kwargs.get("stream"):
|
|
66
|
+
raise NotImplementedError(
|
|
67
|
+
"spendguard does not support stream=True yet -- usage can't be "
|
|
68
|
+
"reliably extracted from a streamed response in this version."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
prompt_text = flatten_messages(messages)
|
|
72
|
+
estimate_output_tokens = (
|
|
73
|
+
max_tokens if max_tokens is not None else DEFAULT_MAX_OUTPUT_TOKENS_ESTIMATE
|
|
74
|
+
)
|
|
75
|
+
estimated_cost = self._estimator.estimate_usd("openai", model, prompt_text, estimate_output_tokens)
|
|
76
|
+
|
|
77
|
+
override = self._override_context.current()
|
|
78
|
+
try:
|
|
79
|
+
reservation_id = self._tracker.check_and_reserve(estimated_cost, override=override)
|
|
80
|
+
except BudgetExceededError:
|
|
81
|
+
self._log(model, "blocked", estimated_cost)
|
|
82
|
+
raise
|
|
83
|
+
|
|
84
|
+
call_kwargs = dict(kwargs)
|
|
85
|
+
if max_tokens is not None:
|
|
86
|
+
call_kwargs["max_tokens"] = max_tokens
|
|
87
|
+
|
|
88
|
+
started_at = time.monotonic()
|
|
89
|
+
try:
|
|
90
|
+
response = self._real.create(model=model, messages=messages, **call_kwargs)
|
|
91
|
+
except Exception:
|
|
92
|
+
latency_ms = (time.monotonic() - started_at) * 1000
|
|
93
|
+
self._tracker.rollback(reservation_id)
|
|
94
|
+
self._log(model, "rolled_back", estimated_cost, reservation_id=reservation_id, latency_ms=latency_ms)
|
|
95
|
+
raise
|
|
96
|
+
latency_ms = (time.monotonic() - started_at) * 1000
|
|
97
|
+
|
|
98
|
+
usage = self._provider.extract_usage(response)
|
|
99
|
+
actual_cost = self._calculator.actual_cost_usd("openai", model, usage)
|
|
100
|
+
self._tracker.commit(reservation_id, actual_cost)
|
|
101
|
+
self._log(
|
|
102
|
+
model,
|
|
103
|
+
"allowed_with_override" if override else "allowed",
|
|
104
|
+
estimated_cost,
|
|
105
|
+
actual_cost,
|
|
106
|
+
reservation_id,
|
|
107
|
+
latency_ms,
|
|
108
|
+
)
|
|
109
|
+
return response
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class _GatedChat:
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
real_chat: Any,
|
|
116
|
+
tracker: SpendTracker,
|
|
117
|
+
estimator: CostEstimator,
|
|
118
|
+
calculator: CostCalculator,
|
|
119
|
+
override_context: OverrideContext,
|
|
120
|
+
workspace: str,
|
|
121
|
+
event_log: Optional[EventLog],
|
|
122
|
+
) -> None:
|
|
123
|
+
self.completions = _GatedChatCompletions(
|
|
124
|
+
real_chat.completions, tracker, estimator, calculator, override_context, workspace, event_log
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OpenAIClientWrapper:
|
|
129
|
+
"""Wraps a sync openai.OpenAI() client. Async clients are not wrapped yet."""
|
|
130
|
+
|
|
131
|
+
def __init__(
|
|
132
|
+
self,
|
|
133
|
+
client: Any,
|
|
134
|
+
tracker: SpendTracker,
|
|
135
|
+
estimator: CostEstimator,
|
|
136
|
+
calculator: CostCalculator,
|
|
137
|
+
override_context: OverrideContext,
|
|
138
|
+
workspace: str = "default",
|
|
139
|
+
event_log: Optional[EventLog] = None,
|
|
140
|
+
) -> None:
|
|
141
|
+
self._client = client
|
|
142
|
+
self.chat = _GatedChat(
|
|
143
|
+
client.chat, tracker, estimator, calculator, override_context, workspace, event_log
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def __getattr__(self, name: str) -> Any:
|
|
147
|
+
return getattr(self._client, name)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spendguard
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call before it happens.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Rahul-git23/spendguard
|
|
6
|
+
Project-URL: Repository, https://github.com/Rahul-git23/spendguard
|
|
7
|
+
Author-email: Rahul Vichare <rahulvichare@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: ai,anthropic,budget,cost,guardrail,llm,openai
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: anthropic>=0.25.0
|
|
21
|
+
Requires-Dist: openai>=1.0.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
24
|
+
Provides-Extra: tiktoken
|
|
25
|
+
Requires-Dist: tiktoken>=0.5.0; extra == 'tiktoken'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# SpendGuard
|
|
29
|
+
|
|
30
|
+
A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call **before it happens** — no surprises at the end of the month.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from spendguard import SpendGuard
|
|
34
|
+
|
|
35
|
+
guard = SpendGuard(workspace="my-app", ceiling_usd=20.0)
|
|
36
|
+
client = guard.wrap_openai(OpenAI()) # or wrap_anthropic(Anthropic())
|
|
37
|
+
|
|
38
|
+
# Call the client exactly as normal — SpendGuard intercepts transparently.
|
|
39
|
+
# If the estimated cost would push cumulative spend past 25% of the $20 ceiling,
|
|
40
|
+
# it raises BudgetExceededError before the API call is made.
|
|
41
|
+
response = client.chat.completions.create(model="gpt-4o", messages=[...])
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install spendguard
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
For more accurate pre-call token counting on OpenAI models:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install spendguard[tiktoken]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## How it works
|
|
57
|
+
|
|
58
|
+
SpendGuard wraps your existing client object. Every call goes through two steps:
|
|
59
|
+
|
|
60
|
+
1. **Pre-call estimate** — approximates the input token count and adds the max output tokens × the model's per-token rate. If `cumulative_spend + estimate > ceiling × threshold_pct`, it raises `BudgetExceededError` before the network call.
|
|
61
|
+
2. **Post-call commit** — reads the provider's actual usage numbers from the response and records the real cost.
|
|
62
|
+
|
|
63
|
+
The default threshold is 25% of the ceiling (`threshold_pct=0.25`). This means a single call can consume at most 25% of your monthly budget — it is a guardrail against a single runaway call, not a hard cap at 100%.
|
|
64
|
+
|
|
65
|
+
## Supported providers and models
|
|
66
|
+
|
|
67
|
+
| Provider | Client wrapper | Models gated by default |
|
|
68
|
+
| ---------- | -------------------- | ----------------------- |
|
|
69
|
+
| OpenAI | `wrap_openai()` | gpt-4o, gpt-4o-mini, and all models in the pricing config |
|
|
70
|
+
| Anthropic | `wrap_anthropic()` | claude-3-5-sonnet, claude-3-opus, haiku, and all models in the pricing config |
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
### Basic setup
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from openai import OpenAI
|
|
78
|
+
from spendguard import SpendGuard
|
|
79
|
+
|
|
80
|
+
guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
|
|
81
|
+
client = guard.wrap_openai(OpenAI())
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
response = client.chat.completions.create(
|
|
85
|
+
model="gpt-4o",
|
|
86
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
87
|
+
max_tokens=512,
|
|
88
|
+
)
|
|
89
|
+
except BudgetExceededError as e:
|
|
90
|
+
print(f"Blocked: {e}")
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Anthropic
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from anthropic import Anthropic
|
|
97
|
+
from spendguard import SpendGuard
|
|
98
|
+
|
|
99
|
+
guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
|
|
100
|
+
client = guard.wrap_anthropic(Anthropic())
|
|
101
|
+
|
|
102
|
+
response = client.messages.create(
|
|
103
|
+
model="claude-sonnet-4-6",
|
|
104
|
+
max_tokens=1024,
|
|
105
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Overriding a block on purpose
|
|
110
|
+
|
|
111
|
+
When you explicitly want to allow a call that would be blocked (e.g., a one-time large batch job), use `track()` with `override=True`:
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
with guard.track(override=True):
|
|
115
|
+
response = client.chat.completions.create(...) # never blocked
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
The override only applies inside the `with` block and does not persist.
|
|
119
|
+
|
|
120
|
+
### Inspecting current spend
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
summary = guard.get_summary()
|
|
124
|
+
# {"ceiling_usd": 20.0, "spent_usd": 1.23, "reserved_usd": 0.0, "threshold_pct": 0.25}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Workspace isolation
|
|
128
|
+
|
|
129
|
+
Each `SpendGuard` instance is scoped to a `workspace` string. When you run multiple products or feature flags, give each its own workspace so their budgets are tracked independently.
|
|
130
|
+
|
|
131
|
+
## Out of scope for v0.1
|
|
132
|
+
|
|
133
|
+
- Streaming calls (`stream=True`) — explicitly rejected with a clear error.
|
|
134
|
+
- Embeddings, images, audio, and other non-chat/messages endpoints.
|
|
135
|
+
- Persistent spend across process restarts (resets on `SpendGuard()` construction).
|
|
136
|
+
|
|
137
|
+
Persistence and streaming support are planned for v1.0.
|
|
138
|
+
|
|
139
|
+
## Feedback
|
|
140
|
+
|
|
141
|
+
Found a bug or have a feature request? [Open an issue](https://github.com/Rahul-git23/spendguard/issues) — all feedback welcome.
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
spendguard/__init__.py,sha256=nKIgYMx1-RYicoJ1PS4ZvWUNxI4t4t-7XJyPOB5_p4U,1279
|
|
2
|
+
spendguard/context.py,sha256=nbxnQ-OX5_LtU_bCjcgedudo259F840K1GPuxL9EC_E,1402
|
|
3
|
+
spendguard/events.py,sha256=cEl-GvgTRM8Y2gLH6tYQi7rCCZztFzhEPLxCJqzfjkc,3879
|
|
4
|
+
spendguard/exceptions.py,sha256=uWxxaeiYMp4rTwtNDr65YX6MFsXpd5q1PfNPVt9tRC4,913
|
|
5
|
+
spendguard/session.py,sha256=1yPoCq52k0znx80xH_LMlGhVs7TheIBx5b0l_5JMgK4,3136
|
|
6
|
+
spendguard/tracker.py,sha256=xwo0e03nyP6roFGo_AIXQhkXfUy_UdFKwrX2Upj4ci0,5584
|
|
7
|
+
spendguard/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
spendguard/config/pricing_anthropic.json,sha256=8O3CK6mjcw60c3FNYHCyAkWotojCnQ6-KIB6vn87pzM,426
|
|
9
|
+
spendguard/config/pricing_openai.json,sha256=9w917MLpp5BA6I_zd_XRFfNqjqOywYft5Rg2T-OBPJA,403
|
|
10
|
+
spendguard/cost/__init__.py,sha256=prdECLVbHNov2aojAxgwNS3AyQ0rG8FpGvmd9FLBMMA,199
|
|
11
|
+
spendguard/cost/calculator.py,sha256=ab3_aGah5PngdF3nTdZGAAEhjv2_xBVp-Ptrt4_Hgw8,757
|
|
12
|
+
spendguard/cost/estimator.py,sha256=joBdbAeeleZeZH8pb9iYT57uMtDWBr0pYOiwKk1gheg,1471
|
|
13
|
+
spendguard/cost/pricing.py,sha256=lwOlTmG06kc99HW2MKc6XK3Rt-OCoqPTMp9C7fCPg0I,2925
|
|
14
|
+
spendguard/providers/__init__.py,sha256=3GRtSd5P1iTHoVKxSP9UAb8t20fpTcv7_qk6GY2KR8Y,200
|
|
15
|
+
spendguard/providers/anthropic_provider.py,sha256=cq9pP0_dPP_crQ4dTSpz21VkB2QD6V1HA9-C1kxyM_4,328
|
|
16
|
+
spendguard/providers/base.py,sha256=-T6N1qBsInQvJl6tPWHz2bgmY5dCoodzJIcpnPYGiLY,1005
|
|
17
|
+
spendguard/providers/openai_provider.py,sha256=TJnEKmhvKi5N9doLmgUzv2t2EuTm6pcEZN9sLerB38c,327
|
|
18
|
+
spendguard/wrappers/__init__.py,sha256=qaLGpTJERDspdbMQqzIWMF_HB08TEgubsL-hC-_qDyk,147
|
|
19
|
+
spendguard/wrappers/_messages.py,sha256=BrpD4vG36J3LEfoh6KuzsRaew8zvJ0g1Ops7Z7Cufv0,535
|
|
20
|
+
spendguard/wrappers/anthropic.py,sha256=kGlxT6fkDivd9RaIziMxK7fZE8Bdn0hLQqMxvfQ94Hs,4328
|
|
21
|
+
spendguard/wrappers/openai.py,sha256=pDLibdu49hIdvoKadU-8EfhpuhaIJ1WhA9sOrpDj8ws,5232
|
|
22
|
+
spendguard-0.1.0.dist-info/METADATA,sha256=vA29iaaCMFdJqq8MGIxwS0wUR1XEdFHyfeZ5WmczXyg,4997
|
|
23
|
+
spendguard-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
24
|
+
spendguard-0.1.0.dist-info/RECORD,,
|