spendguard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spendguard/__init__.py ADDED
@@ -0,0 +1,36 @@
1
+ """spendguard -- blocks an over-budget LLM API call before it happens.
2
+
3
+ Build status: Stage 4 (core platform build), matching README.md's quickstart.
4
+ SpendGuard.wrap_openai() / wrap_anthropic() / track() are implemented and
5
+ gate client.chat.completions.create() / client.messages.create() respectively
6
+ -- every other client attribute (embeddings, models, ...) and streaming calls
7
+ (stream=True) are explicitly out of scope for this MVP wrapper, not silently
8
+ mishandled. Pricing data in config/ is placeholder, not verified current
9
+ rates -- see cost/pricing.py.
10
+ """
11
+ from .exceptions import BudgetError, BudgetExceededError, PricingDataError
12
+ from .tracker import SpendTracker
13
+ from .cost import CostCalculator, CostEstimator, ModelPrice, PricingTable
14
+ from .providers import AnthropicProvider, OpenAIProvider, Provider, Usage
15
+ from .session import SpendGuard
16
+ from .wrappers import AnthropicClientWrapper, OpenAIClientWrapper
17
+
18
+ __version__ = "0.1.0"
19
+
20
+ __all__ = [
21
+ "SpendGuard",
22
+ "SpendTracker",
23
+ "BudgetError",
24
+ "BudgetExceededError",
25
+ "PricingDataError",
26
+ "CostCalculator",
27
+ "CostEstimator",
28
+ "ModelPrice",
29
+ "PricingTable",
30
+ "Provider",
31
+ "Usage",
32
+ "OpenAIProvider",
33
+ "AnthropicProvider",
34
+ "OpenAIClientWrapper",
35
+ "AnthropicClientWrapper",
36
+ ]
File without changes
@@ -0,0 +1,7 @@
1
+ {
2
+ "_note": "Pricing last verified 2026-06-25 against anthropic.com/pricing. Update _version_date and rates when Anthropic publishes a price change.",
3
+ "_version_date": "2026-06-25",
4
+ "claude-haiku-4-5": {"input_per_million": 1.00, "output_per_million": 5.00},
5
+ "claude-sonnet-4-6": {"input_per_million": 3.00, "output_per_million": 15.00},
6
+ "claude-opus-4-6": {"input_per_million": 15.00, "output_per_million": 75.00}
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "_note": "Pricing last verified 2026-06-25 against openai.com/api/pricing. Update _version_date and rates when OpenAI publishes a price change.",
3
+ "_version_date": "2026-06-25",
4
+ "gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.60},
5
+ "gpt-4o": {"input_per_million": 2.50, "output_per_million": 10.00},
6
+ "gpt-4.1-mini": {"input_per_million": 0.40, "output_per_million": 1.60}
7
+ }
spendguard/context.py ADDED
@@ -0,0 +1,46 @@
1
+ """Thread-local override state shared between SpendGuard.track() and the
2
+ provider wrappers it gates.
3
+
4
+ `with guard.track(override=True):` has to affect only calls made on the
5
+ current thread inside that block, not every thread sharing the same
6
+ SpendGuard -- otherwise one thread's override would silently apply to
7
+ another's concurrent call.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import threading
12
+
13
+
14
+ class OverrideContext:
15
+ def __init__(self) -> None:
16
+ self._local = threading.local()
17
+
18
+ def push(self, override: bool) -> None:
19
+ stack = getattr(self._local, "stack", None)
20
+ if stack is None:
21
+ stack = []
22
+ self._local.stack = stack
23
+ stack.append(override)
24
+
25
+ def pop(self) -> None:
26
+ self._local.stack.pop()
27
+
28
+ def current(self) -> bool:
29
+ stack = getattr(self._local, "stack", None)
30
+ return bool(stack) and stack[-1]
31
+
32
+
33
+ class TrackContext:
34
+ """Returned by SpendGuard.track() -- see README.md's "Overriding a block on purpose"."""
35
+
36
+ def __init__(self, override_context: OverrideContext, override: bool) -> None:
37
+ self._override_context = override_context
38
+ self._override = override
39
+
40
+ def __enter__(self) -> "TrackContext":
41
+ self._override_context.push(self._override)
42
+ return self
43
+
44
+ def __exit__(self, exc_type, exc, tb) -> bool:
45
+ self._override_context.pop()
46
+ return False
@@ -0,0 +1,5 @@
1
+ from .pricing import ModelPrice, PricingTable
2
+ from .estimator import CostEstimator
3
+ from .calculator import CostCalculator
4
+
5
+ __all__ = ["ModelPrice", "PricingTable", "CostEstimator", "CostCalculator"]
@@ -0,0 +1,21 @@
1
+ """CostCalculator -- turns real, post-call token usage into an actual dollar cost.
2
+
3
+ Always the source of truth recorded into SpendTracker.commit() -- never the
4
+ pre-call estimate, once the provider's real usage numbers are known.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from ..providers.base import Usage
9
+ from .pricing import PricingTable
10
+
11
+
12
+ class CostCalculator:
13
+ def __init__(self, pricing: PricingTable) -> None:
14
+ self._pricing = pricing
15
+
16
+ def actual_cost_usd(self, provider: str, model: str, usage: Usage) -> float:
17
+ price = self._pricing.get_price(provider, model)
18
+ return (
19
+ usage.input_tokens / 1_000_000 * price.input_per_million
20
+ + usage.output_tokens / 1_000_000 * price.output_per_million
21
+ )
@@ -0,0 +1,45 @@
1
+ """CostEstimator -- pre-call cost estimate from a prompt and max output size.
2
+
3
+ Zero required dependency: input tokens are approximated at ~4 characters per
4
+ token unless tiktoken is installed (pip install spendguard[tiktoken]), in
5
+ which case OpenAI prompts get exact cl100k_base counts. The estimate only has
6
+ to be close enough to gate correctly -- CostCalculator always recomputes the
7
+ real cost from the provider's own usage numbers after the call resolves.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from .pricing import PricingTable
12
+
13
+ CHARS_PER_TOKEN_APPROX = 4
14
+
15
+ try:
16
+ import tiktoken
17
+
18
+ _ENCODING = tiktoken.get_encoding("cl100k_base")
19
+ except ImportError:
20
+ _ENCODING = None
21
+
22
+
23
+ def _count_input_tokens(prompt_text: str, provider: str) -> int:
24
+ if _ENCODING is not None and provider == "openai":
25
+ return max(1, len(_ENCODING.encode(prompt_text)))
26
+ return max(1, len(prompt_text) // CHARS_PER_TOKEN_APPROX)
27
+
28
+
29
+ class CostEstimator:
30
+ def __init__(self, pricing: PricingTable) -> None:
31
+ self._pricing = pricing
32
+
33
+ def estimate_usd(
34
+ self,
35
+ provider: str,
36
+ model: str,
37
+ prompt_text: str,
38
+ max_output_tokens: int,
39
+ ) -> float:
40
+ price = self._pricing.get_price(provider, model)
41
+ input_tokens = _count_input_tokens(prompt_text, provider)
42
+ return (
43
+ input_tokens / 1_000_000 * price.input_per_million
44
+ + max_output_tokens / 1_000_000 * price.output_per_million
45
+ )
@@ -0,0 +1,77 @@
1
+ """PricingTable -- loads per-provider model pricing from config/pricing_<provider>.json.
2
+
3
+ Adding a new provider's prices later is a new config/pricing_<provider>.json
4
+ file, not a code change here. Keys starting with "_" (e.g. "_note") are
5
+ metadata, not models, and are skipped when loading.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import warnings
12
+ from dataclasses import dataclass
13
+ from datetime import date, datetime
14
+ from pathlib import Path
15
+ from typing import Dict, Optional
16
+
17
+ from ..exceptions import PricingDataError
18
+
19
+ _STALENESS_DAYS = 90
20
+ _log = logging.getLogger(__name__)
21
+
22
+ DEFAULT_CONFIG_DIR = Path(__file__).resolve().parent.parent / "config"
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class ModelPrice:
27
+ input_per_million: float
28
+ output_per_million: float
29
+
30
+
31
+ class PricingTable:
32
+ def __init__(self, config_dir: Optional[Path] = None) -> None:
33
+ self._config_dir = config_dir if config_dir is not None else DEFAULT_CONFIG_DIR
34
+ self._cache: Dict[str, Dict[str, ModelPrice]] = {}
35
+
36
+ def _load_provider(self, provider: str) -> Dict[str, ModelPrice]:
37
+ if provider in self._cache:
38
+ return self._cache[provider]
39
+
40
+ path = self._config_dir / f"pricing_{provider}.json"
41
+ if not path.exists():
42
+ raise PricingDataError(f"no pricing config for provider '{provider}' (looked for {path})")
43
+
44
+ raw = json.loads(path.read_text(encoding="utf-8"))
45
+
46
+ version_date_str = raw.get("_version_date")
47
+ if version_date_str:
48
+ try:
49
+ version_date = datetime.strptime(version_date_str, "%Y-%m-%d").date()
50
+ age_days = (date.today() - version_date).days
51
+ if age_days > _STALENESS_DAYS:
52
+ warnings.warn(
53
+ f"SpendGuard: {provider} pricing data is {age_days} days old "
54
+ f"(last verified {version_date_str}). Cost estimates may be inaccurate "
55
+ f"if {provider} has changed their prices. Update config/pricing_{provider}.json "
56
+ f"or pass a custom config_dir to PricingTable().",
57
+ stacklevel=3,
58
+ )
59
+ except ValueError:
60
+ _log.debug("Could not parse _version_date '%s' in pricing_%s.json", version_date_str, provider)
61
+
62
+ prices = {
63
+ model: ModelPrice(
64
+ input_per_million=entry["input_per_million"],
65
+ output_per_million=entry["output_per_million"],
66
+ )
67
+ for model, entry in raw.items()
68
+ if not model.startswith("_")
69
+ }
70
+ self._cache[provider] = prices
71
+ return prices
72
+
73
+ def get_price(self, provider: str, model: str) -> ModelPrice:
74
+ prices = self._load_provider(provider)
75
+ if model not in prices:
76
+ raise PricingDataError(f"unknown model '{model}' for provider '{provider}'")
77
+ return prices[model]
spendguard/events.py ADDED
@@ -0,0 +1,112 @@
1
+ """Local spend-event log -- zero-backend audit trail.
2
+
3
+ release-gates.md's Observability readiness gate requires "a way to tell,
4
+ after the fact, what happened during a failed run." For an SDK with no
5
+ backend and no account, that has to be a local file: one JSON line per
6
+ gated call, recording what was estimated, what was decided, what it took,
7
+ and what was actually spent. This is also the file a future V1 Ingestion API
8
+ would read from (prd.md's Dependency Map) -- spendguard doesn't need that
9
+ API to exist yet, it just needs to not lose the data that API will
10
+ eventually want.
11
+
12
+ Rotation: release-gates.md's cost-model gate requires "logging volume...
13
+ has an explicit limit or sampling rule" -- an unbounded append-only file
14
+ would otherwise grow forever for a high-call-volume user (the exact
15
+ runaway-agent scenario this product exists to catch). max_bytes bounds disk
16
+ usage to roughly 2x its value: one rotated file is kept (events.jsonl.1),
17
+ the previous rotation is discarded, never an unbounded history.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import threading
23
+ from dataclasses import asdict, dataclass
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+ from typing import Optional
27
+
28
+ DEFAULT_EVENT_LOG_PATH = Path(".spendguard") / "events.jsonl"
29
+ DEFAULT_MAX_BYTES = 10 * 1024 * 1024 # 10 MB
30
+
31
+
32
+ def _now_iso() -> str:
33
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class SpendEvent:
38
+ at: str
39
+ workspace: str
40
+ provider: str
41
+ model: str
42
+ decision: str # "allowed" | "allowed_with_override" | "blocked" | "rolled_back"
43
+ estimated_cost_usd: float
44
+ actual_cost_usd: Optional[float] = None
45
+ reservation_id: Optional[str] = None
46
+ latency_ms: Optional[float] = None
47
+
48
+
49
+ class EventLog:
50
+ """Thread-safe append-only JSONL writer, with size-based rotation.
51
+
52
+ Output only -- spendguard never reads this back to make a decision in the
53
+ MVP. The lock here is separate from SpendTracker's: it protects the file
54
+ write, not the budget arithmetic.
55
+ """
56
+
57
+ def __init__(self, path: Path, max_bytes: Optional[int] = DEFAULT_MAX_BYTES) -> None:
58
+ self._path = path
59
+ self._max_bytes = max_bytes
60
+ self._lock = threading.Lock()
61
+
62
+ @property
63
+ def path(self) -> Path:
64
+ return self._path
65
+
66
+ def _rotate_if_needed(self, incoming_bytes: int) -> None:
67
+ if self._max_bytes is None:
68
+ return
69
+ try:
70
+ current_size = self._path.stat().st_size
71
+ except FileNotFoundError:
72
+ return
73
+ if current_size + incoming_bytes <= self._max_bytes:
74
+ return
75
+ rotated = self._path.with_name(self._path.name + ".1")
76
+ if rotated.exists():
77
+ rotated.unlink()
78
+ self._path.rename(rotated)
79
+
80
+ def append(self, event: SpendEvent) -> None:
81
+ with self._lock:
82
+ self._path.parent.mkdir(parents=True, exist_ok=True)
83
+ line = json.dumps(asdict(event)) + "\n"
84
+ self._rotate_if_needed(len(line.encode("utf-8")))
85
+ with self._path.open("a", encoding="utf-8") as f:
86
+ f.write(line)
87
+
88
+ def record(
89
+ self,
90
+ *,
91
+ workspace: str,
92
+ provider: str,
93
+ model: str,
94
+ decision: str,
95
+ estimated_cost_usd: float,
96
+ actual_cost_usd: Optional[float] = None,
97
+ reservation_id: Optional[str] = None,
98
+ latency_ms: Optional[float] = None,
99
+ ) -> None:
100
+ self.append(
101
+ SpendEvent(
102
+ at=_now_iso(),
103
+ workspace=workspace,
104
+ provider=provider,
105
+ model=model,
106
+ decision=decision,
107
+ estimated_cost_usd=estimated_cost_usd,
108
+ actual_cost_usd=actual_cost_usd,
109
+ reservation_id=reservation_id,
110
+ latency_ms=latency_ms,
111
+ )
112
+ )
@@ -0,0 +1,31 @@
1
+ """Exceptions raised by spendguard's budget enforcement."""
2
+ from __future__ import annotations
3
+
4
+
5
+ class BudgetError(Exception):
6
+ """Base class for all spendguard budget errors."""
7
+
8
+
9
+ class BudgetExceededError(BudgetError):
10
+ """Raised when a call's estimated cost would cross the ceiling's threshold.
11
+
12
+ Carries the numbers that caused the block so a caller (or its except clause)
13
+ can report something more useful than the message string alone.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ message: str,
19
+ *,
20
+ estimated_cost: float,
21
+ remaining: float,
22
+ threshold_usd: float,
23
+ ) -> None:
24
+ super().__init__(message)
25
+ self.estimated_cost = estimated_cost
26
+ self.remaining = remaining
27
+ self.threshold_usd = threshold_usd
28
+
29
+
30
+ class PricingDataError(BudgetError):
31
+ """Raised when a model's pricing can't be resolved (unknown model, bad config)."""
@@ -0,0 +1,5 @@
1
+ from .base import Provider, Usage
2
+ from .openai_provider import OpenAIProvider
3
+ from .anthropic_provider import AnthropicProvider
4
+
5
+ __all__ = ["Provider", "Usage", "OpenAIProvider", "AnthropicProvider"]
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .base import Provider, Usage
6
+
7
+
8
+ class AnthropicProvider(Provider):
9
+ name = "anthropic"
10
+
11
+ def extract_usage(self, response: Any) -> Usage:
12
+ usage = response.usage
13
+ return Usage(input_tokens=usage.input_tokens, output_tokens=usage.output_tokens)
@@ -0,0 +1,31 @@
1
+ """Provider abstraction -- the seam a new LLM provider plugs into.
2
+
3
+ Adding a provider beyond OpenAI/Anthropic (Gemini, Perplexity, etc. -- out of
4
+ scope today per product-thesis.md's non-goals, but the reason this seam
5
+ exists) means implementing extract_usage() below plus a pricing config file.
6
+ SpendTracker, CostEstimator, and CostCalculator never reference a provider's
7
+ SDK or response shape directly -- they only see Usage and dollar amounts.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from abc import ABC, abstractmethod
12
+ from dataclasses import dataclass
13
+ from typing import Any
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class Usage:
18
+ """Real token counts for one completed call, provider-shape already stripped off."""
19
+
20
+ input_tokens: int
21
+ output_tokens: int
22
+
23
+
24
+ class Provider(ABC):
25
+ """Translates one provider SDK's response object into a Usage."""
26
+
27
+ name: str
28
+
29
+ @abstractmethod
30
+ def extract_usage(self, response: Any) -> Usage:
31
+ """Pull real token counts out of a completed API response."""
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .base import Provider, Usage
6
+
7
+
8
+ class OpenAIProvider(Provider):
9
+ name = "openai"
10
+
11
+ def extract_usage(self, response: Any) -> Usage:
12
+ usage = response.usage
13
+ return Usage(input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens)
spendguard/session.py ADDED
@@ -0,0 +1,85 @@
1
+ """SpendGuard -- the public entry point described in README.md's quickstart.
2
+
3
+ wrap_openai() / wrap_anthropic() are the primary integration pattern: wrap
4
+ the client once at construction, then call it exactly like the real client.
5
+ track() is the escape hatch for README.md's "Overriding a block on purpose".
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any, Optional
11
+
12
+ from .context import OverrideContext, TrackContext
13
+ from .cost.calculator import CostCalculator
14
+ from .cost.estimator import CostEstimator
15
+ from .cost.pricing import PricingTable
16
+ from .events import DEFAULT_EVENT_LOG_PATH, DEFAULT_MAX_BYTES, EventLog
17
+ from .tracker import DEFAULT_THRESHOLD_PCT, SpendTracker
18
+ from .wrappers.anthropic import AnthropicClientWrapper
19
+ from .wrappers.openai import OpenAIClientWrapper
20
+
21
+
22
+ class SpendGuard:
23
+ def __init__(
24
+ self,
25
+ workspace: str,
26
+ ceiling_usd: float,
27
+ threshold_pct: float = DEFAULT_THRESHOLD_PCT,
28
+ pricing_config_dir: Optional[Path] = None,
29
+ event_log_path: Optional[Path] = None,
30
+ event_log_max_bytes: Optional[int] = DEFAULT_MAX_BYTES,
31
+ log_events: bool = True,
32
+ ) -> None:
33
+ self.workspace = workspace
34
+ self._tracker = SpendTracker(ceiling_usd, threshold_pct)
35
+ pricing = PricingTable(config_dir=pricing_config_dir)
36
+ self._estimator = CostEstimator(pricing)
37
+ self._calculator = CostCalculator(pricing)
38
+ self._override_context = OverrideContext()
39
+ self._event_log = (
40
+ EventLog(
41
+ event_log_path if event_log_path is not None else DEFAULT_EVENT_LOG_PATH,
42
+ max_bytes=event_log_max_bytes,
43
+ )
44
+ if log_events
45
+ else None
46
+ )
47
+
48
+ def wrap_openai(self, client: Any) -> OpenAIClientWrapper:
49
+ return OpenAIClientWrapper(
50
+ client,
51
+ self._tracker,
52
+ self._estimator,
53
+ self._calculator,
54
+ self._override_context,
55
+ self.workspace,
56
+ self._event_log,
57
+ )
58
+
59
+ def wrap_anthropic(self, client: Any) -> AnthropicClientWrapper:
60
+ return AnthropicClientWrapper(
61
+ client,
62
+ self._tracker,
63
+ self._estimator,
64
+ self._calculator,
65
+ self._override_context,
66
+ self.workspace,
67
+ self._event_log,
68
+ )
69
+
70
+ def track(self, model: Optional[str] = None, override: bool = False) -> TrackContext:
71
+ """Escape hatch for one call -- see README.md's "Overriding a block on purpose".
72
+
73
+ `model` is accepted for forward-compatible per-call labeling (planned
74
+ for the V1.1 event log) but has no effect yet; `override` is the only
75
+ thing this currently does.
76
+ """
77
+ return TrackContext(self._override_context, override)
78
+
79
+ def get_summary(self) -> dict:
80
+ return self._tracker.get_summary()
81
+
82
+ @property
83
+ def event_log_path(self) -> Optional[Path]:
84
+ """Where local spend events are being written, or None if logging is disabled."""
85
+ return self._event_log.path if self._event_log is not None else None
spendguard/tracker.py ADDED
@@ -0,0 +1,131 @@
1
+ """SpendTracker — thread-safe ceiling enforcement with atomic reservation.
2
+
3
+ Per README.md's "How the ceiling actually works" and prd.md's matching workflow,
4
+ spendguard mirrors runtime/cost_ledger.py's hard rule: a call is blocked once
5
+ cumulative spend plus its estimate would cross 25% of the founder-set ceiling.
6
+
7
+ cost_ledger.py checks that rule against spend already recorded, which is safe for
8
+ a single sequential caller but not for concurrent ones -- two calls firing at the
9
+ same instant can each see room under the threshold and both proceed, together
10
+ overshooting it. That gap is exactly the failure mode this product's own pitch
11
+ describes (a fan-out or retry loop spending faster than a check-then-record
12
+ ledger can track) and is why the PRD now requires this tracker to make the
13
+ check-and-reserve step atomic: estimated cost is reserved against the threshold
14
+ under a single lock *before* a call is allowed to proceed, then released and
15
+ converted to real spend (commit) or discarded (rollback) once the call resolves.
16
+ At every point in time, spent + reserved <= threshold.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import threading
21
+ import uuid
22
+ from typing import Dict
23
+
24
+ from .exceptions import BudgetExceededError
25
+
26
+ DEFAULT_THRESHOLD_PCT = 0.25
27
+
28
+
29
+ class SpendTracker:
30
+ """Tracks spend and in-flight reservations against a ceiling's 25% threshold."""
31
+
32
+ def __init__(self, ceiling_usd: float, threshold_pct: float = DEFAULT_THRESHOLD_PCT) -> None:
33
+ if ceiling_usd < 0:
34
+ raise ValueError("ceiling_usd cannot be negative")
35
+ if not 0 < threshold_pct <= 1:
36
+ raise ValueError("threshold_pct must be between 0 and 1")
37
+
38
+ self._ceiling = float(ceiling_usd)
39
+ self._threshold_pct = threshold_pct
40
+ self._threshold = self._ceiling * threshold_pct
41
+ self._spent = 0.0
42
+ self._reserved = 0.0
43
+ self._reservations: Dict[str, float] = {}
44
+ self._lock = threading.Lock()
45
+
46
+ def check_and_reserve(self, estimated_cost: float, override: bool = False) -> str:
47
+ """Atomically check the threshold and reserve funds for a call.
48
+
49
+ Raises BudgetExceededError if the estimate would cross the threshold,
50
+ unless override=True -- an explicit override still reserves (and will
51
+ still show up as spend once committed), it just skips the block.
52
+
53
+ Returns a reservation ID to pass to commit() or rollback().
54
+ """
55
+ with self._lock:
56
+ remaining = self._threshold - self._spent - self._reserved
57
+ if estimated_cost > remaining and not override:
58
+ raise BudgetExceededError(
59
+ f"Estimated cost ${estimated_cost:.6f} would exceed the remaining "
60
+ f"${remaining:.6f} under the ${self._threshold:.2f} threshold "
61
+ f"({self._threshold_pct:.0%} of ${self._ceiling:.2f} ceiling)",
62
+ estimated_cost=estimated_cost,
63
+ remaining=remaining,
64
+ threshold_usd=self._threshold,
65
+ )
66
+
67
+ reservation_id = str(uuid.uuid4())
68
+ self._reserved += estimated_cost
69
+ self._reservations[reservation_id] = estimated_cost
70
+ return reservation_id
71
+
72
+ def commit(self, reservation_id: str, actual_cost: float) -> None:
73
+ """Release a reservation and record the real cost once a call succeeds."""
74
+ with self._lock:
75
+ if reservation_id not in self._reservations:
76
+ raise KeyError(f"no such reservation: {reservation_id}")
77
+ reserved_amount = self._reservations.pop(reservation_id)
78
+ self._reserved -= reserved_amount
79
+ self._spent += actual_cost
80
+
81
+ def rollback(self, reservation_id: str) -> None:
82
+ """Release a reservation with no spend recorded, e.g. after a failed call.
83
+
84
+ Idempotent -- rolling back a reservation that's already gone is a no-op,
85
+ not an error, since a caller's cleanup path may run more than once.
86
+ """
87
+ with self._lock:
88
+ reserved_amount = self._reservations.pop(reservation_id, None)
89
+ if reserved_amount is not None:
90
+ self._reserved -= reserved_amount
91
+
92
+ def get_ceiling_usd(self) -> float:
93
+ return self._ceiling
94
+
95
+ def get_threshold_usd(self) -> float:
96
+ return self._threshold
97
+
98
+ def get_spent(self) -> float:
99
+ with self._lock:
100
+ return self._spent
101
+
102
+ def get_reserved(self) -> float:
103
+ with self._lock:
104
+ return self._reserved
105
+
106
+ def get_remaining(self) -> float:
107
+ """Room left under the threshold, accounting for in-flight reservations."""
108
+ with self._lock:
109
+ return self._threshold - self._spent - self._reserved
110
+
111
+ def get_summary(self) -> dict:
112
+ with self._lock:
113
+ spent, reserved = self._spent, self._reserved
114
+ utilization = (spent + reserved) / self._threshold * 100 if self._threshold > 0 else 0.0
115
+ return {
116
+ "ceiling_usd": self._ceiling,
117
+ "threshold_pct": self._threshold_pct,
118
+ "threshold_usd": self._threshold,
119
+ "spent": spent,
120
+ "reserved": reserved,
121
+ "remaining": self._threshold - spent - reserved,
122
+ "utilization_percent": utilization,
123
+ }
124
+
125
+ def reset(self) -> None:
126
+ """Reset spent and reservations to zero. Does not cancel in-flight calls --
127
+ only call this when certain nothing is pending."""
128
+ with self._lock:
129
+ self._spent = 0.0
130
+ self._reserved = 0.0
131
+ self._reservations.clear()
@@ -0,0 +1,4 @@
1
+ from .openai import OpenAIClientWrapper
2
+ from .anthropic import AnthropicClientWrapper
3
+
4
+ __all__ = ["OpenAIClientWrapper", "AnthropicClientWrapper"]
@@ -0,0 +1,15 @@
1
+ """Shared helper: flatten a chat-style messages list into plain text for the
2
+ pre-call token estimate. OpenAI's and Anthropic's message dicts use the same
3
+ {"role": ..., "content": "..."} shape for simple text content.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ from typing import Iterable
8
+
9
+
10
+ def flatten_messages(messages: Iterable[dict]) -> str:
11
+ parts = []
12
+ for message in messages:
13
+ content = message.get("content", "")
14
+ parts.append(content if isinstance(content, str) else str(content))
15
+ return "\n".join(parts)
@@ -0,0 +1,118 @@
1
+ """AnthropicClientWrapper -- drop-in wrap of an anthropic.Anthropic() client.
2
+
3
+ Gates client.messages.create() only; everything else delegates straight
4
+ through, ungated -- same MVP scope boundary as OpenAIClientWrapper. Streaming
5
+ is rejected outright for the same reason: usage can't be reliably read off a
6
+ streamed response in this version.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ from typing import Any, List, Optional
12
+
13
+ from ..context import OverrideContext
14
+ from ..cost.calculator import CostCalculator
15
+ from ..cost.estimator import CostEstimator
16
+ from ..events import EventLog
17
+ from ..exceptions import BudgetExceededError
18
+ from ..providers.anthropic_provider import AnthropicProvider
19
+ from ..tracker import SpendTracker
20
+ from ._messages import flatten_messages
21
+
22
+
23
+ class _GatedMessages:
24
+ def __init__(
25
+ self,
26
+ real_messages: Any,
27
+ tracker: SpendTracker,
28
+ estimator: CostEstimator,
29
+ calculator: CostCalculator,
30
+ override_context: OverrideContext,
31
+ workspace: str,
32
+ event_log: Optional[EventLog],
33
+ ) -> None:
34
+ self._real = real_messages
35
+ self._tracker = tracker
36
+ self._estimator = estimator
37
+ self._calculator = calculator
38
+ self._override_context = override_context
39
+ self._workspace = workspace
40
+ self._event_log = event_log
41
+ self._provider = AnthropicProvider()
42
+
43
+ def _log(
44
+ self, model: str, decision: str, estimated_cost: float, actual_cost=None, reservation_id=None, latency_ms=None
45
+ ) -> None:
46
+ if self._event_log is not None:
47
+ self._event_log.record(
48
+ workspace=self._workspace,
49
+ provider="anthropic",
50
+ model=model,
51
+ decision=decision,
52
+ estimated_cost_usd=estimated_cost,
53
+ actual_cost_usd=actual_cost,
54
+ reservation_id=reservation_id,
55
+ latency_ms=latency_ms,
56
+ )
57
+
58
+ def create(self, *, model: str, max_tokens: int, messages: List[dict], **kwargs: Any) -> Any:
59
+ if kwargs.get("stream"):
60
+ raise NotImplementedError(
61
+ "spendguard does not support stream=True yet -- usage can't be "
62
+ "reliably extracted from a streamed response in this version."
63
+ )
64
+
65
+ prompt_text = flatten_messages(messages)
66
+ estimated_cost = self._estimator.estimate_usd("anthropic", model, prompt_text, max_tokens)
67
+
68
+ override = self._override_context.current()
69
+ try:
70
+ reservation_id = self._tracker.check_and_reserve(estimated_cost, override=override)
71
+ except BudgetExceededError:
72
+ self._log(model, "blocked", estimated_cost)
73
+ raise
74
+
75
+ started_at = time.monotonic()
76
+ try:
77
+ response = self._real.create(model=model, max_tokens=max_tokens, messages=messages, **kwargs)
78
+ except Exception:
79
+ latency_ms = (time.monotonic() - started_at) * 1000
80
+ self._tracker.rollback(reservation_id)
81
+ self._log(model, "rolled_back", estimated_cost, reservation_id=reservation_id, latency_ms=latency_ms)
82
+ raise
83
+ latency_ms = (time.monotonic() - started_at) * 1000
84
+
85
+ usage = self._provider.extract_usage(response)
86
+ actual_cost = self._calculator.actual_cost_usd("anthropic", model, usage)
87
+ self._tracker.commit(reservation_id, actual_cost)
88
+ self._log(
89
+ model,
90
+ "allowed_with_override" if override else "allowed",
91
+ estimated_cost,
92
+ actual_cost,
93
+ reservation_id,
94
+ latency_ms,
95
+ )
96
+ return response
97
+
98
+
99
+ class AnthropicClientWrapper:
100
+ """Wraps a sync anthropic.Anthropic() client. Async clients are not wrapped yet."""
101
+
102
+ def __init__(
103
+ self,
104
+ client: Any,
105
+ tracker: SpendTracker,
106
+ estimator: CostEstimator,
107
+ calculator: CostCalculator,
108
+ override_context: OverrideContext,
109
+ workspace: str = "default",
110
+ event_log: Optional[EventLog] = None,
111
+ ) -> None:
112
+ self._client = client
113
+ self.messages = _GatedMessages(
114
+ client.messages, tracker, estimator, calculator, override_context, workspace, event_log
115
+ )
116
+
117
+ def __getattr__(self, name: str) -> Any:
118
+ return getattr(self._client, name)
@@ -0,0 +1,147 @@
1
+ """OpenAIClientWrapper -- drop-in wrap of an openai.OpenAI() client.
2
+
3
+ Gates client.chat.completions.create() only; every other attribute on the
4
+ client (embeddings, models, files, ...) delegates straight through, ungated.
5
+ That's a deliberate MVP scope boundary, not an oversight -- see README.md's
6
+ "What this does NOT do (yet)". Streaming (stream=True) is rejected outright
7
+ rather than silently mis-tracked, since usage can't be reliably read off a
8
+ streamed response in this version.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import time
13
+ from typing import Any, List, Optional
14
+
15
+ from ..context import OverrideContext
16
+ from ..cost.calculator import CostCalculator
17
+ from ..cost.estimator import CostEstimator
18
+ from ..events import EventLog
19
+ from ..providers.openai_provider import OpenAIProvider
20
+ from ..tracker import SpendTracker
21
+ from ..exceptions import BudgetExceededError
22
+ from ._messages import flatten_messages
23
+
24
+ DEFAULT_MAX_OUTPUT_TOKENS_ESTIMATE = 1024
25
+
26
+
27
+ class _GatedChatCompletions:
28
+ def __init__(
29
+ self,
30
+ real_completions: Any,
31
+ tracker: SpendTracker,
32
+ estimator: CostEstimator,
33
+ calculator: CostCalculator,
34
+ override_context: OverrideContext,
35
+ workspace: str,
36
+ event_log: Optional[EventLog],
37
+ ) -> None:
38
+ self._real = real_completions
39
+ self._tracker = tracker
40
+ self._estimator = estimator
41
+ self._calculator = calculator
42
+ self._override_context = override_context
43
+ self._workspace = workspace
44
+ self._event_log = event_log
45
+ self._provider = OpenAIProvider()
46
+
47
+ def _log(
48
+ self, model: str, decision: str, estimated_cost: float, actual_cost=None, reservation_id=None, latency_ms=None
49
+ ) -> None:
50
+ if self._event_log is not None:
51
+ self._event_log.record(
52
+ workspace=self._workspace,
53
+ provider="openai",
54
+ model=model,
55
+ decision=decision,
56
+ estimated_cost_usd=estimated_cost,
57
+ actual_cost_usd=actual_cost,
58
+ reservation_id=reservation_id,
59
+ latency_ms=latency_ms,
60
+ )
61
+
62
+ def create(
63
+ self, *, model: str, messages: List[dict], max_tokens: Optional[int] = None, **kwargs: Any
64
+ ) -> Any:
65
+ if kwargs.get("stream"):
66
+ raise NotImplementedError(
67
+ "spendguard does not support stream=True yet -- usage can't be "
68
+ "reliably extracted from a streamed response in this version."
69
+ )
70
+
71
+ prompt_text = flatten_messages(messages)
72
+ estimate_output_tokens = (
73
+ max_tokens if max_tokens is not None else DEFAULT_MAX_OUTPUT_TOKENS_ESTIMATE
74
+ )
75
+ estimated_cost = self._estimator.estimate_usd("openai", model, prompt_text, estimate_output_tokens)
76
+
77
+ override = self._override_context.current()
78
+ try:
79
+ reservation_id = self._tracker.check_and_reserve(estimated_cost, override=override)
80
+ except BudgetExceededError:
81
+ self._log(model, "blocked", estimated_cost)
82
+ raise
83
+
84
+ call_kwargs = dict(kwargs)
85
+ if max_tokens is not None:
86
+ call_kwargs["max_tokens"] = max_tokens
87
+
88
+ started_at = time.monotonic()
89
+ try:
90
+ response = self._real.create(model=model, messages=messages, **call_kwargs)
91
+ except Exception:
92
+ latency_ms = (time.monotonic() - started_at) * 1000
93
+ self._tracker.rollback(reservation_id)
94
+ self._log(model, "rolled_back", estimated_cost, reservation_id=reservation_id, latency_ms=latency_ms)
95
+ raise
96
+ latency_ms = (time.monotonic() - started_at) * 1000
97
+
98
+ usage = self._provider.extract_usage(response)
99
+ actual_cost = self._calculator.actual_cost_usd("openai", model, usage)
100
+ self._tracker.commit(reservation_id, actual_cost)
101
+ self._log(
102
+ model,
103
+ "allowed_with_override" if override else "allowed",
104
+ estimated_cost,
105
+ actual_cost,
106
+ reservation_id,
107
+ latency_ms,
108
+ )
109
+ return response
110
+
111
+
112
+ class _GatedChat:
113
+ def __init__(
114
+ self,
115
+ real_chat: Any,
116
+ tracker: SpendTracker,
117
+ estimator: CostEstimator,
118
+ calculator: CostCalculator,
119
+ override_context: OverrideContext,
120
+ workspace: str,
121
+ event_log: Optional[EventLog],
122
+ ) -> None:
123
+ self.completions = _GatedChatCompletions(
124
+ real_chat.completions, tracker, estimator, calculator, override_context, workspace, event_log
125
+ )
126
+
127
+
128
+ class OpenAIClientWrapper:
129
+ """Wraps a sync openai.OpenAI() client. Async clients are not wrapped yet."""
130
+
131
+ def __init__(
132
+ self,
133
+ client: Any,
134
+ tracker: SpendTracker,
135
+ estimator: CostEstimator,
136
+ calculator: CostCalculator,
137
+ override_context: OverrideContext,
138
+ workspace: str = "default",
139
+ event_log: Optional[EventLog] = None,
140
+ ) -> None:
141
+ self._client = client
142
+ self.chat = _GatedChat(
143
+ client.chat, tracker, estimator, calculator, override_context, workspace, event_log
144
+ )
145
+
146
+ def __getattr__(self, name: str) -> Any:
147
+ return getattr(self._client, name)
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: spendguard
3
+ Version: 0.1.0
4
+ Summary: A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call before it happens.
5
+ Project-URL: Homepage, https://github.com/Rahul-git23/spendguard
6
+ Project-URL: Repository, https://github.com/Rahul-git23/spendguard
7
+ Author-email: Rahul Vichare <rahulvichare@gmail.com>
8
+ License: MIT
9
+ Keywords: ai,anthropic,budget,cost,guardrail,llm,openai
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.9
20
+ Requires-Dist: anthropic>=0.25.0
21
+ Requires-Dist: openai>=1.0.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
24
+ Provides-Extra: tiktoken
25
+ Requires-Dist: tiktoken>=0.5.0; extra == 'tiktoken'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # SpendGuard
29
+
30
+ A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call **before it happens** — no surprises at the end of the month.
31
+
32
+ ```python
33
+ from spendguard import SpendGuard
34
+
35
+ guard = SpendGuard(workspace="my-app", ceiling_usd=20.0)
36
+ client = guard.wrap_openai(OpenAI()) # or wrap_anthropic(Anthropic())
37
+
38
+ # Call the client exactly as normal — SpendGuard intercepts transparently.
39
+ # If the estimated cost would push cumulative spend past 25% of the $20 ceiling,
40
+ # it raises BudgetExceededError before the API call is made.
41
+ response = client.chat.completions.create(model="gpt-4o", messages=[...])
42
+ ```
43
+
44
+ ## Install
45
+
46
+ ```bash
47
+ pip install spendguard
48
+ ```
49
+
50
+ For more accurate pre-call token counting on OpenAI models:
51
+
52
+ ```bash
53
+ pip install spendguard[tiktoken]
54
+ ```
55
+
56
+ ## How it works
57
+
58
+ SpendGuard wraps your existing client object. Every call goes through two steps:
59
+
60
+ 1. **Pre-call estimate** — approximates the input token count and adds the max output tokens × the model's per-token rate. If `cumulative_spend + estimate > ceiling × threshold_pct`, it raises `BudgetExceededError` before the network call.
61
+ 2. **Post-call commit** — reads the provider's actual usage numbers from the response and records the real cost.
62
+
63
+ The default threshold is 25% of the ceiling (`threshold_pct=0.25`). This means a single call can consume at most 25% of your monthly budget — it is a guardrail against a single runaway call, not a hard cap at 100%.
64
+
65
+ ## Supported providers and models
66
+
67
+ | Provider | Client wrapper | Models gated by default |
68
+ | ---------- | -------------------- | ----------------------- |
69
+ | OpenAI | `wrap_openai()` | gpt-4o, gpt-4o-mini, and all models in the pricing config |
70
+ | Anthropic | `wrap_anthropic()` | claude-3-5-sonnet, claude-3-opus, haiku, and all models in the pricing config |
71
+
72
+ ## Usage
73
+
74
+ ### Basic setup
75
+
76
+ ```python
77
+ from openai import OpenAI
78
+ from spendguard import SpendGuard
79
+
80
+ guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
81
+ client = guard.wrap_openai(OpenAI())
82
+
83
+ try:
84
+ response = client.chat.completions.create(
85
+ model="gpt-4o",
86
+ messages=[{"role": "user", "content": "Hello"}],
87
+ max_tokens=512,
88
+ )
89
+ except BudgetExceededError as e:
90
+ print(f"Blocked: {e}")
91
+ ```
92
+
93
+ ### Anthropic
94
+
95
+ ```python
96
+ from anthropic import Anthropic
97
+ from spendguard import SpendGuard
98
+
99
+ guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
100
+ client = guard.wrap_anthropic(Anthropic())
101
+
102
+ response = client.messages.create(
103
+ model="claude-sonnet-4-6",
104
+ max_tokens=1024,
105
+ messages=[{"role": "user", "content": "Hello"}],
106
+ )
107
+ ```
108
+
109
+ ### Overriding a block on purpose
110
+
111
+ When you explicitly want to allow a call that would be blocked (e.g., a one-time large batch job), use `track()` with `override=True`:
112
+
113
+ ```python
114
+ with guard.track(override=True):
115
+ response = client.chat.completions.create(...) # never blocked
116
+ ```
117
+
118
+ The override only applies inside the `with` block and does not persist.
119
+
120
+ ### Inspecting current spend
121
+
122
+ ```python
123
+ summary = guard.get_summary()
124
+ # {"ceiling_usd": 20.0, "spent_usd": 1.23, "reserved_usd": 0.0, "threshold_pct": 0.25}
125
+ ```
126
+
127
+ ## Workspace isolation
128
+
129
+ Each `SpendGuard` instance is scoped to a `workspace` string. When you run multiple products or feature flags, give each its own workspace so their budgets are tracked independently.
130
+
131
+ ## Out of scope for v0.1
132
+
133
+ - Streaming calls (`stream=True`) — explicitly rejected with a clear error.
134
+ - Embeddings, images, audio, and other non-chat/messages endpoints.
135
+ - Persistent spend across process restarts (resets on `SpendGuard()` construction).
136
+
137
+ Persistence and streaming support are planned for v1.0.
138
+
139
+ ## Feedback
140
+
141
+ Found a bug or have a feature request? [Open an issue](https://github.com/Rahul-git23/spendguard/issues) — all feedback welcome.
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,24 @@
1
+ spendguard/__init__.py,sha256=nKIgYMx1-RYicoJ1PS4ZvWUNxI4t4t-7XJyPOB5_p4U,1279
2
+ spendguard/context.py,sha256=nbxnQ-OX5_LtU_bCjcgedudo259F840K1GPuxL9EC_E,1402
3
+ spendguard/events.py,sha256=cEl-GvgTRM8Y2gLH6tYQi7rCCZztFzhEPLxCJqzfjkc,3879
4
+ spendguard/exceptions.py,sha256=uWxxaeiYMp4rTwtNDr65YX6MFsXpd5q1PfNPVt9tRC4,913
5
+ spendguard/session.py,sha256=1yPoCq52k0znx80xH_LMlGhVs7TheIBx5b0l_5JMgK4,3136
6
+ spendguard/tracker.py,sha256=xwo0e03nyP6roFGo_AIXQhkXfUy_UdFKwrX2Upj4ci0,5584
7
+ spendguard/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ spendguard/config/pricing_anthropic.json,sha256=8O3CK6mjcw60c3FNYHCyAkWotojCnQ6-KIB6vn87pzM,426
9
+ spendguard/config/pricing_openai.json,sha256=9w917MLpp5BA6I_zd_XRFfNqjqOywYft5Rg2T-OBPJA,403
10
+ spendguard/cost/__init__.py,sha256=prdECLVbHNov2aojAxgwNS3AyQ0rG8FpGvmd9FLBMMA,199
11
+ spendguard/cost/calculator.py,sha256=ab3_aGah5PngdF3nTdZGAAEhjv2_xBVp-Ptrt4_Hgw8,757
12
+ spendguard/cost/estimator.py,sha256=joBdbAeeleZeZH8pb9iYT57uMtDWBr0pYOiwKk1gheg,1471
13
+ spendguard/cost/pricing.py,sha256=lwOlTmG06kc99HW2MKc6XK3Rt-OCoqPTMp9C7fCPg0I,2925
14
+ spendguard/providers/__init__.py,sha256=3GRtSd5P1iTHoVKxSP9UAb8t20fpTcv7_qk6GY2KR8Y,200
15
+ spendguard/providers/anthropic_provider.py,sha256=cq9pP0_dPP_crQ4dTSpz21VkB2QD6V1HA9-C1kxyM_4,328
16
+ spendguard/providers/base.py,sha256=-T6N1qBsInQvJl6tPWHz2bgmY5dCoodzJIcpnPYGiLY,1005
17
+ spendguard/providers/openai_provider.py,sha256=TJnEKmhvKi5N9doLmgUzv2t2EuTm6pcEZN9sLerB38c,327
18
+ spendguard/wrappers/__init__.py,sha256=qaLGpTJERDspdbMQqzIWMF_HB08TEgubsL-hC-_qDyk,147
19
+ spendguard/wrappers/_messages.py,sha256=BrpD4vG36J3LEfoh6KuzsRaew8zvJ0g1Ops7Z7Cufv0,535
20
+ spendguard/wrappers/anthropic.py,sha256=kGlxT6fkDivd9RaIziMxK7fZE8Bdn0hLQqMxvfQ94Hs,4328
21
+ spendguard/wrappers/openai.py,sha256=pDLibdu49hIdvoKadU-8EfhpuhaIJ1WhA9sOrpDj8ws,5232
22
+ spendguard-0.1.0.dist-info/METADATA,sha256=vA29iaaCMFdJqq8MGIxwS0wUR1XEdFHyfeZ5WmczXyg,4997
23
+ spendguard-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
24
+ spendguard-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any