spendguard 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. spendguard-0.1.0/.github/workflows/publish-spendguard.yml +96 -0
  2. spendguard-0.1.0/.gitignore +10 -0
  3. spendguard-0.1.0/PKG-INFO +145 -0
  4. spendguard-0.1.0/README.md +118 -0
  5. spendguard-0.1.0/pyproject.toml +45 -0
  6. spendguard-0.1.0/src/spendguard/__init__.py +36 -0
  7. spendguard-0.1.0/src/spendguard/config/__init__.py +0 -0
  8. spendguard-0.1.0/src/spendguard/config/pricing_anthropic.json +7 -0
  9. spendguard-0.1.0/src/spendguard/config/pricing_openai.json +7 -0
  10. spendguard-0.1.0/src/spendguard/context.py +46 -0
  11. spendguard-0.1.0/src/spendguard/cost/__init__.py +5 -0
  12. spendguard-0.1.0/src/spendguard/cost/calculator.py +21 -0
  13. spendguard-0.1.0/src/spendguard/cost/estimator.py +45 -0
  14. spendguard-0.1.0/src/spendguard/cost/pricing.py +77 -0
  15. spendguard-0.1.0/src/spendguard/events.py +112 -0
  16. spendguard-0.1.0/src/spendguard/exceptions.py +31 -0
  17. spendguard-0.1.0/src/spendguard/providers/__init__.py +5 -0
  18. spendguard-0.1.0/src/spendguard/providers/anthropic_provider.py +13 -0
  19. spendguard-0.1.0/src/spendguard/providers/base.py +31 -0
  20. spendguard-0.1.0/src/spendguard/providers/openai_provider.py +13 -0
  21. spendguard-0.1.0/src/spendguard/session.py +85 -0
  22. spendguard-0.1.0/src/spendguard/tracker.py +131 -0
  23. spendguard-0.1.0/src/spendguard/wrappers/__init__.py +4 -0
  24. spendguard-0.1.0/src/spendguard/wrappers/_messages.py +15 -0
  25. spendguard-0.1.0/src/spendguard/wrappers/anthropic.py +118 -0
  26. spendguard-0.1.0/src/spendguard/wrappers/openai.py +147 -0
  27. spendguard-0.1.0/tests/__init__.py +0 -0
  28. spendguard-0.1.0/tests/fakes.py +53 -0
  29. spendguard-0.1.0/tests/test_calculator.py +27 -0
  30. spendguard-0.1.0/tests/test_estimator.py +44 -0
  31. spendguard-0.1.0/tests/test_events.py +192 -0
  32. spendguard-0.1.0/tests/test_pricing.py +85 -0
  33. spendguard-0.1.0/tests/test_providers.py +25 -0
  34. spendguard-0.1.0/tests/test_real_sdk_shapes.py +65 -0
  35. spendguard-0.1.0/tests/test_session.py +145 -0
  36. spendguard-0.1.0/tests/test_tracker.py +94 -0
@@ -0,0 +1,96 @@
1
+ name: Publish SpendGuard to PyPI
2
+
3
+ # Trigger: create a GitHub Release with a tag of the form spendguard-v<version>
4
+ # e.g. tag = "spendguard-v0.1.0", release title = "SpendGuard 0.1.0"
5
+ #
6
+ # PyPI Trusted Publishing setup (one-time, on pypi.org):
7
+ # 1. pypi.org -> Account -> Publishing -> "Add pending publisher"
8
+ # 2. Fill in:
9
+ # GitHub owner: Rahul-git23
10
+ # Repository name: spendguard
11
+ # Workflow filename: publish-spendguard.yml
12
+ # Environment name: pypi
13
+ # 3. Save. No token or secret needed.
14
+ #
15
+ # GitHub Environment setup (one-time, in this repo):
16
+ # Settings -> Environments -> New environment -> name it "pypi"
17
+ # Optional: add yourself as Required reviewer for manual approval.
18
+
19
+ on:
20
+ release:
21
+ types: [published]
22
+
23
+ jobs:
24
+ test:
25
+ name: Test (Python ${{ matrix.python-version }})
26
+ if: startsWith(github.ref_name, 'spendguard-v')
27
+ runs-on: ubuntu-latest
28
+ strategy:
29
+ fail-fast: true
30
+ matrix:
31
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+ - uses: actions/setup-python@v5
35
+ with:
36
+ python-version: ${{ matrix.python-version }}
37
+ - name: Install package + dev deps
38
+ run: pip install -e ".[dev]"
39
+ - name: Run tests
40
+ run: python -m pytest --tb=short -q
41
+
42
+ build:
43
+ name: Build distribution
44
+ if: startsWith(github.ref_name, 'spendguard-v')
45
+ needs: test
46
+ runs-on: ubuntu-latest
47
+ steps:
48
+ - uses: actions/checkout@v4
49
+ - uses: actions/setup-python@v5
50
+ with:
51
+ python-version: "3.12"
52
+ - name: Verify tag matches pyproject.toml version
53
+ run: |
54
+ python -c "
55
+ import re, sys
56
+ tag = '${{ github.ref_name }}'
57
+ with open('pyproject.toml') as f:
58
+ content = f.read()
59
+ m = re.search(r'^\s*version\s*=\s*\"([^\"]+)\"', content, re.MULTILINE)
60
+ version = m.group(1)
61
+ expected = 'spendguard-v' + version
62
+ if tag != expected:
63
+ print('ERROR: tag', tag, 'does not match version', version)
64
+ sys.exit(1)
65
+ print('OK: tag', tag, 'matches version', version)
66
+ "
67
+ - name: Install build
68
+ run: pip install build twine
69
+ - name: Build wheel and sdist
70
+ run: python -m build
71
+ - name: Check dist
72
+ run: python -m twine check dist/*
73
+ - name: Upload dist artifact
74
+ uses: actions/upload-artifact@v4
75
+ with:
76
+ name: spendguard-dist
77
+ path: dist/
78
+ if-no-files-found: error
79
+
80
+ publish:
81
+ name: Publish to PyPI
82
+ if: startsWith(github.ref_name, 'spendguard-v')
83
+ needs: build
84
+ runs-on: ubuntu-latest
85
+ environment: pypi
86
+ permissions:
87
+ id-token: write
88
+ contents: read
89
+ steps:
90
+ - name: Download dist artifact
91
+ uses: actions/download-artifact@v4
92
+ with:
93
+ name: spendguard-dist
94
+ path: dist/
95
+ - name: Publish to PyPI
96
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ .env
8
+ .pypirc
9
+ *.pyc
10
+ .DS_Store
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: spendguard
3
+ Version: 0.1.0
4
+ Summary: A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call before it happens.
5
+ Project-URL: Homepage, https://github.com/Rahul-git23/spendguard
6
+ Project-URL: Repository, https://github.com/Rahul-git23/spendguard
7
+ Author-email: Rahul Vichare <rahulvichare@gmail.com>
8
+ License: MIT
9
+ Keywords: ai,anthropic,budget,cost,guardrail,llm,openai
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.9
20
+ Requires-Dist: anthropic>=0.25.0
21
+ Requires-Dist: openai>=1.0.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
24
+ Provides-Extra: tiktoken
25
+ Requires-Dist: tiktoken>=0.5.0; extra == 'tiktoken'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # SpendGuard
29
+
30
+ A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call **before it happens** — no surprises at the end of the month.
31
+
32
+ ```python
33
+ from spendguard import SpendGuard
34
+
35
+ guard = SpendGuard(workspace="my-app", ceiling_usd=20.0)
36
+ client = guard.wrap_openai(OpenAI()) # or wrap_anthropic(Anthropic())
37
+
38
+ # Call the client exactly as normal — SpendGuard intercepts transparently.
39
+ # If the estimated cost would push cumulative spend past 25% of the $20 ceiling,
40
+ # it raises BudgetExceededError before the API call is made.
41
+ response = client.chat.completions.create(model="gpt-4o", messages=[...])
42
+ ```
43
+
44
+ ## Install
45
+
46
+ ```bash
47
+ pip install spendguard
48
+ ```
49
+
50
+ For more accurate pre-call token counting on OpenAI models:
51
+
52
+ ```bash
53
+ pip install spendguard[tiktoken]
54
+ ```
55
+
56
+ ## How it works
57
+
58
+ SpendGuard wraps your existing client object. Every call goes through two steps:
59
+
60
+ 1. **Pre-call estimate** — approximates the input token count and adds the max output tokens × the model's per-token rate. If `cumulative_spend + estimate > ceiling × threshold_pct`, it raises `BudgetExceededError` before the network call.
61
+ 2. **Post-call commit** — reads the provider's actual usage numbers from the response and records the real cost.
62
+
63
+ The default threshold is 25% of the ceiling (`threshold_pct=0.25`). This means a single call can consume at most 25% of your monthly budget — it is a guardrail against a single runaway call, not a hard cap at 100%.
64
+
65
+ ## Supported providers and models
66
+
67
+ | Provider | Client wrapper | Models gated by default |
68
+ | ---------- | -------------------- | ----------------------- |
69
+ | OpenAI | `wrap_openai()` | gpt-4o, gpt-4o-mini, and all models in the pricing config |
70
+ | Anthropic | `wrap_anthropic()` | claude-3-5-sonnet, claude-3-opus, haiku, and all models in the pricing config |
71
+
72
+ ## Usage
73
+
74
+ ### Basic setup
75
+
76
+ ```python
77
+ from openai import OpenAI
78
+ from spendguard import SpendGuard
79
+
80
+ guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
81
+ client = guard.wrap_openai(OpenAI())
82
+
83
+ try:
84
+ response = client.chat.completions.create(
85
+ model="gpt-4o",
86
+ messages=[{"role": "user", "content": "Hello"}],
87
+ max_tokens=512,
88
+ )
89
+ except BudgetExceededError as e:
90
+ print(f"Blocked: {e}")
91
+ ```
92
+
93
+ ### Anthropic
94
+
95
+ ```python
96
+ from anthropic import Anthropic
97
+ from spendguard import SpendGuard
98
+
99
+ guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
100
+ client = guard.wrap_anthropic(Anthropic())
101
+
102
+ response = client.messages.create(
103
+ model="claude-sonnet-4-6",
104
+ max_tokens=1024,
105
+ messages=[{"role": "user", "content": "Hello"}],
106
+ )
107
+ ```
108
+
109
+ ### Overriding a block on purpose
110
+
111
+ When you explicitly want to allow a call that would be blocked (e.g., a one-time large batch job), use `track()` with `override=True`:
112
+
113
+ ```python
114
+ with guard.track(override=True):
115
+ response = client.chat.completions.create(...) # never blocked
116
+ ```
117
+
118
+ The override only applies inside the `with` block and does not persist.
119
+
120
+ ### Inspecting current spend
121
+
122
+ ```python
123
+ summary = guard.get_summary()
124
+ # {"ceiling_usd": 20.0, "spent_usd": 1.23, "reserved_usd": 0.0, "threshold_pct": 0.25}
125
+ ```
126
+
127
+ ## Workspace isolation
128
+
129
+ Each `SpendGuard` instance is scoped to a `workspace` string. When you run multiple products or feature flags, give each its own workspace so their budgets are tracked independently.
130
+
131
+ ## Out of scope for v0.1
132
+
133
+ - Streaming calls (`stream=True`) — explicitly rejected with a clear error.
134
+ - Embeddings, images, audio, and other non-chat/messages endpoints.
135
+ - Persistent spend across process restarts (resets on `SpendGuard()` construction).
136
+
137
+ Persistence and streaming support are planned for v1.0.
138
+
139
+ ## Feedback
140
+
141
+ Found a bug or have a feature request? [Open an issue](https://github.com/Rahul-git23/spendguard/issues) — all feedback welcome.
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,118 @@
1
+ # SpendGuard
2
+
3
+ A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call **before it happens** — no surprises at the end of the month.
4
+
5
+ ```python
6
+ from spendguard import SpendGuard
7
+
8
+ guard = SpendGuard(workspace="my-app", ceiling_usd=20.0)
9
+ client = guard.wrap_openai(OpenAI()) # or wrap_anthropic(Anthropic())
10
+
11
+ # Call the client exactly as normal — SpendGuard intercepts transparently.
12
+ # If the estimated cost would push cumulative spend past 25% of the $20 ceiling,
13
+ # it raises BudgetExceededError before the API call is made.
14
+ response = client.chat.completions.create(model="gpt-4o", messages=[...])
15
+ ```
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install spendguard
21
+ ```
22
+
23
+ For more accurate pre-call token counting on OpenAI models:
24
+
25
+ ```bash
26
+ pip install spendguard[tiktoken]
27
+ ```
28
+
29
+ ## How it works
30
+
31
+ SpendGuard wraps your existing client object. Every call goes through two steps:
32
+
33
+ 1. **Pre-call estimate** — approximates the input token count and adds the max output tokens × the model's per-token rate. If `cumulative_spend + estimate > ceiling × threshold_pct`, it raises `BudgetExceededError` before the network call.
34
+ 2. **Post-call commit** — reads the provider's actual usage numbers from the response and records the real cost.
35
+
36
+ The default threshold is 25% of the ceiling (`threshold_pct=0.25`). This means a single call can consume at most 25% of your monthly budget — it is a guardrail against a single runaway call, not a hard cap at 100%.
37
+
38
+ ## Supported providers and models
39
+
40
+ | Provider | Client wrapper | Models gated by default |
41
+ | ---------- | -------------------- | ----------------------- |
42
+ | OpenAI | `wrap_openai()` | gpt-4o, gpt-4o-mini, and all models in the pricing config |
43
+ | Anthropic | `wrap_anthropic()` | claude-3-5-sonnet, claude-3-opus, haiku, and all models in the pricing config |
44
+
45
+ ## Usage
46
+
47
+ ### Basic setup
48
+
49
+ ```python
50
+ from openai import OpenAI
51
+ from spendguard import SpendGuard
52
+
53
+ guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
54
+ client = guard.wrap_openai(OpenAI())
55
+
56
+ try:
57
+ response = client.chat.completions.create(
58
+ model="gpt-4o",
59
+ messages=[{"role": "user", "content": "Hello"}],
60
+ max_tokens=512,
61
+ )
62
+ except BudgetExceededError as e:
63
+ print(f"Blocked: {e}")
64
+ ```
65
+
66
+ ### Anthropic
67
+
68
+ ```python
69
+ from anthropic import Anthropic
70
+ from spendguard import SpendGuard
71
+
72
+ guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
73
+ client = guard.wrap_anthropic(Anthropic())
74
+
75
+ response = client.messages.create(
76
+ model="claude-sonnet-4-6",
77
+ max_tokens=1024,
78
+ messages=[{"role": "user", "content": "Hello"}],
79
+ )
80
+ ```
81
+
82
+ ### Overriding a block on purpose
83
+
84
+ When you explicitly want to allow a call that would be blocked (e.g., a one-time large batch job), use `track()` with `override=True`:
85
+
86
+ ```python
87
+ with guard.track(override=True):
88
+ response = client.chat.completions.create(...) # never blocked
89
+ ```
90
+
91
+ The override only applies inside the `with` block and does not persist.
92
+
93
+ ### Inspecting current spend
94
+
95
+ ```python
96
+ summary = guard.get_summary()
97
+ # {"ceiling_usd": 20.0, "spent_usd": 1.23, "reserved_usd": 0.0, "threshold_pct": 0.25}
98
+ ```
99
+
100
+ ## Workspace isolation
101
+
102
+ Each `SpendGuard` instance is scoped to a `workspace` string. When you run multiple products or feature flags, give each its own workspace so their budgets are tracked independently.
103
+
104
+ ## Out of scope for v0.1
105
+
106
+ - Streaming calls (`stream=True`) — explicitly rejected with a clear error.
107
+ - Embeddings, images, audio, and other non-chat/messages endpoints.
108
+ - Persistent spend across process restarts (resets on `SpendGuard()` construction).
109
+
110
+ Persistence and streaming support are planned for v1.0.
111
+
112
+ ## Feedback
113
+
114
+ Found a bug or have a feature request? [Open an issue](https://github.com/Rahul-git23/spendguard/issues) — all feedback welcome.
115
+
116
+ ## License
117
+
118
+ MIT
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "spendguard"
7
+ version = "0.1.0"
8
+ description = "A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call before it happens."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "Rahul Vichare", email = "rahulvichare@gmail.com" },
14
+ ]
15
+ keywords = ["openai", "anthropic", "llm", "cost", "budget", "guardrail", "ai"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Software Development :: Libraries :: Python Modules",
26
+ ]
27
+ dependencies = [
28
+ "openai>=1.0.0",
29
+ "anthropic>=0.25.0",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ dev = ["pytest>=7.0.0"]
34
+ tiktoken = ["tiktoken>=0.5.0"]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/Rahul-git23/spendguard"
38
+ Repository = "https://github.com/Rahul-git23/spendguard"
39
+
40
+ [tool.hatch.build.targets.wheel]
41
+ packages = ["src/spendguard"]
42
+
43
+ [tool.pytest.ini_options]
44
+ testpaths = ["tests"]
45
+
@@ -0,0 +1,36 @@
1
+ """spendguard -- blocks an over-budget LLM API call before it happens.
2
+
3
+ Build status: Stage 4 (core platform build), matching README.md's quickstart.
4
+ SpendGuard.wrap_openai() / wrap_anthropic() / track() are implemented and
5
+ gate client.chat.completions.create() / client.messages.create() respectively
6
+ -- every other client attribute (embeddings, models, ...) and streaming calls
7
+ (stream=True) are explicitly out of scope for this MVP wrapper, not silently
8
+ mishandled. Pricing data in config/ is placeholder, not verified current
9
+ rates -- see cost/pricing.py.
10
+ """
11
+ from .exceptions import BudgetError, BudgetExceededError, PricingDataError
12
+ from .tracker import SpendTracker
13
+ from .cost import CostCalculator, CostEstimator, ModelPrice, PricingTable
14
+ from .providers import AnthropicProvider, OpenAIProvider, Provider, Usage
15
+ from .session import SpendGuard
16
+ from .wrappers import AnthropicClientWrapper, OpenAIClientWrapper
17
+
18
+ __version__ = "0.1.0"
19
+
20
+ __all__ = [
21
+ "SpendGuard",
22
+ "SpendTracker",
23
+ "BudgetError",
24
+ "BudgetExceededError",
25
+ "PricingDataError",
26
+ "CostCalculator",
27
+ "CostEstimator",
28
+ "ModelPrice",
29
+ "PricingTable",
30
+ "Provider",
31
+ "Usage",
32
+ "OpenAIProvider",
33
+ "AnthropicProvider",
34
+ "OpenAIClientWrapper",
35
+ "AnthropicClientWrapper",
36
+ ]
File without changes
@@ -0,0 +1,7 @@
1
+ {
2
+ "_note": "Pricing last verified 2026-06-25 against anthropic.com/pricing. Update _version_date and rates when Anthropic publishes a price change.",
3
+ "_version_date": "2026-06-25",
4
+ "claude-haiku-4-5": {"input_per_million": 1.00, "output_per_million": 5.00},
5
+ "claude-sonnet-4-6": {"input_per_million": 3.00, "output_per_million": 15.00},
6
+ "claude-opus-4-6": {"input_per_million": 15.00, "output_per_million": 75.00}
7
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "_note": "Pricing last verified 2026-06-25 against openai.com/api/pricing. Update _version_date and rates when OpenAI publishes a price change.",
3
+ "_version_date": "2026-06-25",
4
+ "gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.60},
5
+ "gpt-4o": {"input_per_million": 2.50, "output_per_million": 10.00},
6
+ "gpt-4.1-mini": {"input_per_million": 0.40, "output_per_million": 1.60}
7
+ }
@@ -0,0 +1,46 @@
1
+ """Thread-local override state shared between SpendGuard.track() and the
2
+ provider wrappers it gates.
3
+
4
+ `with guard.track(override=True):` has to affect only calls made on the
5
+ current thread inside that block, not every thread sharing the same
6
+ SpendGuard -- otherwise one thread's override would silently apply to
7
+ another's concurrent call.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import threading
12
+
13
+
14
+ class OverrideContext:
15
+ def __init__(self) -> None:
16
+ self._local = threading.local()
17
+
18
+ def push(self, override: bool) -> None:
19
+ stack = getattr(self._local, "stack", None)
20
+ if stack is None:
21
+ stack = []
22
+ self._local.stack = stack
23
+ stack.append(override)
24
+
25
+ def pop(self) -> None:
26
+ self._local.stack.pop()
27
+
28
+ def current(self) -> bool:
29
+ stack = getattr(self._local, "stack", None)
30
+ return bool(stack) and stack[-1]
31
+
32
+
33
+ class TrackContext:
34
+ """Returned by SpendGuard.track() -- see README.md's "Overriding a block on purpose"."""
35
+
36
+ def __init__(self, override_context: OverrideContext, override: bool) -> None:
37
+ self._override_context = override_context
38
+ self._override = override
39
+
40
+ def __enter__(self) -> "TrackContext":
41
+ self._override_context.push(self._override)
42
+ return self
43
+
44
+ def __exit__(self, exc_type, exc, tb) -> bool:
45
+ self._override_context.pop()
46
+ return False
@@ -0,0 +1,5 @@
1
+ from .pricing import ModelPrice, PricingTable
2
+ from .estimator import CostEstimator
3
+ from .calculator import CostCalculator
4
+
5
+ __all__ = ["ModelPrice", "PricingTable", "CostEstimator", "CostCalculator"]
@@ -0,0 +1,21 @@
1
+ """CostCalculator -- turns real, post-call token usage into an actual dollar cost.
2
+
3
+ Always the source of truth recorded into SpendTracker.commit() -- never the
4
+ pre-call estimate, once the provider's real usage numbers are known.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from ..providers.base import Usage
9
+ from .pricing import PricingTable
10
+
11
+
12
+ class CostCalculator:
13
+ def __init__(self, pricing: PricingTable) -> None:
14
+ self._pricing = pricing
15
+
16
+ def actual_cost_usd(self, provider: str, model: str, usage: Usage) -> float:
17
+ price = self._pricing.get_price(provider, model)
18
+ return (
19
+ usage.input_tokens / 1_000_000 * price.input_per_million
20
+ + usage.output_tokens / 1_000_000 * price.output_per_million
21
+ )
@@ -0,0 +1,45 @@
1
+ """CostEstimator -- pre-call cost estimate from a prompt and max output size.
2
+
3
+ Zero required dependency: input tokens are approximated at ~4 characters per
4
+ token unless tiktoken is installed (pip install spendguard[tiktoken]), in
5
+ which case OpenAI prompts get exact cl100k_base counts. The estimate only has
6
+ to be close enough to gate correctly -- CostCalculator always recomputes the
7
+ real cost from the provider's own usage numbers after the call resolves.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from .pricing import PricingTable
12
+
13
+ CHARS_PER_TOKEN_APPROX = 4
14
+
15
+ try:
16
+ import tiktoken
17
+
18
+ _ENCODING = tiktoken.get_encoding("cl100k_base")
19
+ except ImportError:
20
+ _ENCODING = None
21
+
22
+
23
+ def _count_input_tokens(prompt_text: str, provider: str) -> int:
24
+ if _ENCODING is not None and provider == "openai":
25
+ return max(1, len(_ENCODING.encode(prompt_text)))
26
+ return max(1, len(prompt_text) // CHARS_PER_TOKEN_APPROX)
27
+
28
+
29
+ class CostEstimator:
30
+ def __init__(self, pricing: PricingTable) -> None:
31
+ self._pricing = pricing
32
+
33
+ def estimate_usd(
34
+ self,
35
+ provider: str,
36
+ model: str,
37
+ prompt_text: str,
38
+ max_output_tokens: int,
39
+ ) -> float:
40
+ price = self._pricing.get_price(provider, model)
41
+ input_tokens = _count_input_tokens(prompt_text, provider)
42
+ return (
43
+ input_tokens / 1_000_000 * price.input_per_million
44
+ + max_output_tokens / 1_000_000 * price.output_per_million
45
+ )
@@ -0,0 +1,77 @@
1
+ """PricingTable -- loads per-provider model pricing from config/pricing_<provider>.json.
2
+
3
+ Adding a new provider's prices later is a new config/pricing_<provider>.json
4
+ file, not a code change here. Keys starting with "_" (e.g. "_note") are
5
+ metadata, not models, and are skipped when loading.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import warnings
12
+ from dataclasses import dataclass
13
+ from datetime import date, datetime
14
+ from pathlib import Path
15
+ from typing import Dict, Optional
16
+
17
+ from ..exceptions import PricingDataError
18
+
19
+ _STALENESS_DAYS = 90
20
+ _log = logging.getLogger(__name__)
21
+
22
+ DEFAULT_CONFIG_DIR = Path(__file__).resolve().parent.parent / "config"
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class ModelPrice:
27
+ input_per_million: float
28
+ output_per_million: float
29
+
30
+
31
+ class PricingTable:
32
+ def __init__(self, config_dir: Optional[Path] = None) -> None:
33
+ self._config_dir = config_dir if config_dir is not None else DEFAULT_CONFIG_DIR
34
+ self._cache: Dict[str, Dict[str, ModelPrice]] = {}
35
+
36
+ def _load_provider(self, provider: str) -> Dict[str, ModelPrice]:
37
+ if provider in self._cache:
38
+ return self._cache[provider]
39
+
40
+ path = self._config_dir / f"pricing_{provider}.json"
41
+ if not path.exists():
42
+ raise PricingDataError(f"no pricing config for provider '{provider}' (looked for {path})")
43
+
44
+ raw = json.loads(path.read_text(encoding="utf-8"))
45
+
46
+ version_date_str = raw.get("_version_date")
47
+ if version_date_str:
48
+ try:
49
+ version_date = datetime.strptime(version_date_str, "%Y-%m-%d").date()
50
+ age_days = (date.today() - version_date).days
51
+ if age_days > _STALENESS_DAYS:
52
+ warnings.warn(
53
+ f"SpendGuard: {provider} pricing data is {age_days} days old "
54
+ f"(last verified {version_date_str}). Cost estimates may be inaccurate "
55
+ f"if {provider} has changed their prices. Update config/pricing_{provider}.json "
56
+ f"or pass a custom config_dir to PricingTable().",
57
+ stacklevel=3,
58
+ )
59
+ except ValueError:
60
+ _log.debug("Could not parse _version_date '%s' in pricing_%s.json", version_date_str, provider)
61
+
62
+ prices = {
63
+ model: ModelPrice(
64
+ input_per_million=entry["input_per_million"],
65
+ output_per_million=entry["output_per_million"],
66
+ )
67
+ for model, entry in raw.items()
68
+ if not model.startswith("_")
69
+ }
70
+ self._cache[provider] = prices
71
+ return prices
72
+
73
+ def get_price(self, provider: str, model: str) -> ModelPrice:
74
+ prices = self._load_provider(provider)
75
+ if model not in prices:
76
+ raise PricingDataError(f"unknown model '{model}' for provider '{provider}'")
77
+ return prices[model]