spendguard 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spendguard-0.1.0/.github/workflows/publish-spendguard.yml +96 -0
- spendguard-0.1.0/.gitignore +10 -0
- spendguard-0.1.0/PKG-INFO +145 -0
- spendguard-0.1.0/README.md +118 -0
- spendguard-0.1.0/pyproject.toml +45 -0
- spendguard-0.1.0/src/spendguard/__init__.py +36 -0
- spendguard-0.1.0/src/spendguard/config/__init__.py +0 -0
- spendguard-0.1.0/src/spendguard/config/pricing_anthropic.json +7 -0
- spendguard-0.1.0/src/spendguard/config/pricing_openai.json +7 -0
- spendguard-0.1.0/src/spendguard/context.py +46 -0
- spendguard-0.1.0/src/spendguard/cost/__init__.py +5 -0
- spendguard-0.1.0/src/spendguard/cost/calculator.py +21 -0
- spendguard-0.1.0/src/spendguard/cost/estimator.py +45 -0
- spendguard-0.1.0/src/spendguard/cost/pricing.py +77 -0
- spendguard-0.1.0/src/spendguard/events.py +112 -0
- spendguard-0.1.0/src/spendguard/exceptions.py +31 -0
- spendguard-0.1.0/src/spendguard/providers/__init__.py +5 -0
- spendguard-0.1.0/src/spendguard/providers/anthropic_provider.py +13 -0
- spendguard-0.1.0/src/spendguard/providers/base.py +31 -0
- spendguard-0.1.0/src/spendguard/providers/openai_provider.py +13 -0
- spendguard-0.1.0/src/spendguard/session.py +85 -0
- spendguard-0.1.0/src/spendguard/tracker.py +131 -0
- spendguard-0.1.0/src/spendguard/wrappers/__init__.py +4 -0
- spendguard-0.1.0/src/spendguard/wrappers/_messages.py +15 -0
- spendguard-0.1.0/src/spendguard/wrappers/anthropic.py +118 -0
- spendguard-0.1.0/src/spendguard/wrappers/openai.py +147 -0
- spendguard-0.1.0/tests/__init__.py +0 -0
- spendguard-0.1.0/tests/fakes.py +53 -0
- spendguard-0.1.0/tests/test_calculator.py +27 -0
- spendguard-0.1.0/tests/test_estimator.py +44 -0
- spendguard-0.1.0/tests/test_events.py +192 -0
- spendguard-0.1.0/tests/test_pricing.py +85 -0
- spendguard-0.1.0/tests/test_providers.py +25 -0
- spendguard-0.1.0/tests/test_real_sdk_shapes.py +65 -0
- spendguard-0.1.0/tests/test_session.py +145 -0
- spendguard-0.1.0/tests/test_tracker.py +94 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
name: Publish SpendGuard to PyPI
|
|
2
|
+
|
|
3
|
+
# Trigger: create a GitHub Release with a tag of the form spendguard-v<version>
|
|
4
|
+
# e.g. tag = "spendguard-v0.1.0", release title = "SpendGuard 0.1.0"
|
|
5
|
+
#
|
|
6
|
+
# PyPI Trusted Publishing setup (one-time, on pypi.org):
|
|
7
|
+
# 1. pypi.org -> Account -> Publishing -> "Add pending publisher"
|
|
8
|
+
# 2. Fill in:
|
|
9
|
+
# GitHub owner: Rahul-git23
|
|
10
|
+
# Repository name: spendguard
|
|
11
|
+
# Workflow filename: publish-spendguard.yml
|
|
12
|
+
# Environment name: pypi
|
|
13
|
+
# 3. Save. No token or secret needed.
|
|
14
|
+
#
|
|
15
|
+
# GitHub Environment setup (one-time, in this repo):
|
|
16
|
+
# Settings -> Environments -> New environment -> name it "pypi"
|
|
17
|
+
# Optional: add yourself as Required reviewer for manual approval.
|
|
18
|
+
|
|
19
|
+
on:
|
|
20
|
+
release:
|
|
21
|
+
types: [published]
|
|
22
|
+
|
|
23
|
+
jobs:
|
|
24
|
+
test:
|
|
25
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
26
|
+
if: startsWith(github.ref_name, 'spendguard-v')
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
strategy:
|
|
29
|
+
fail-fast: true
|
|
30
|
+
matrix:
|
|
31
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/checkout@v4
|
|
34
|
+
- uses: actions/setup-python@v5
|
|
35
|
+
with:
|
|
36
|
+
python-version: ${{ matrix.python-version }}
|
|
37
|
+
- name: Install package + dev deps
|
|
38
|
+
run: pip install -e ".[dev]"
|
|
39
|
+
- name: Run tests
|
|
40
|
+
run: python -m pytest --tb=short -q
|
|
41
|
+
|
|
42
|
+
build:
|
|
43
|
+
name: Build distribution
|
|
44
|
+
if: startsWith(github.ref_name, 'spendguard-v')
|
|
45
|
+
needs: test
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
steps:
|
|
48
|
+
- uses: actions/checkout@v4
|
|
49
|
+
- uses: actions/setup-python@v5
|
|
50
|
+
with:
|
|
51
|
+
python-version: "3.12"
|
|
52
|
+
- name: Verify tag matches pyproject.toml version
|
|
53
|
+
run: |
|
|
54
|
+
python -c "
|
|
55
|
+
import re, sys
|
|
56
|
+
tag = '${{ github.ref_name }}'
|
|
57
|
+
with open('pyproject.toml') as f:
|
|
58
|
+
content = f.read()
|
|
59
|
+
m = re.search(r'^\s*version\s*=\s*\"([^\"]+)\"', content, re.MULTILINE)
|
|
60
|
+
version = m.group(1)
|
|
61
|
+
expected = 'spendguard-v' + version
|
|
62
|
+
if tag != expected:
|
|
63
|
+
print('ERROR: tag', tag, 'does not match version', version)
|
|
64
|
+
sys.exit(1)
|
|
65
|
+
print('OK: tag', tag, 'matches version', version)
|
|
66
|
+
"
|
|
67
|
+
- name: Install build
|
|
68
|
+
run: pip install build twine
|
|
69
|
+
- name: Build wheel and sdist
|
|
70
|
+
run: python -m build
|
|
71
|
+
- name: Check dist
|
|
72
|
+
run: python -m twine check dist/*
|
|
73
|
+
- name: Upload dist artifact
|
|
74
|
+
uses: actions/upload-artifact@v4
|
|
75
|
+
with:
|
|
76
|
+
name: spendguard-dist
|
|
77
|
+
path: dist/
|
|
78
|
+
if-no-files-found: error
|
|
79
|
+
|
|
80
|
+
publish:
|
|
81
|
+
name: Publish to PyPI
|
|
82
|
+
if: startsWith(github.ref_name, 'spendguard-v')
|
|
83
|
+
needs: build
|
|
84
|
+
runs-on: ubuntu-latest
|
|
85
|
+
environment: pypi
|
|
86
|
+
permissions:
|
|
87
|
+
id-token: write
|
|
88
|
+
contents: read
|
|
89
|
+
steps:
|
|
90
|
+
- name: Download dist artifact
|
|
91
|
+
uses: actions/download-artifact@v4
|
|
92
|
+
with:
|
|
93
|
+
name: spendguard-dist
|
|
94
|
+
path: dist/
|
|
95
|
+
- name: Publish to PyPI
|
|
96
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spendguard
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call before it happens.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Rahul-git23/spendguard
|
|
6
|
+
Project-URL: Repository, https://github.com/Rahul-git23/spendguard
|
|
7
|
+
Author-email: Rahul Vichare <rahulvichare@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: ai,anthropic,budget,cost,guardrail,llm,openai
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: anthropic>=0.25.0
|
|
21
|
+
Requires-Dist: openai>=1.0.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
24
|
+
Provides-Extra: tiktoken
|
|
25
|
+
Requires-Dist: tiktoken>=0.5.0; extra == 'tiktoken'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# SpendGuard
|
|
29
|
+
|
|
30
|
+
A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call **before it happens** — no surprises at the end of the month.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from spendguard import SpendGuard
|
|
34
|
+
|
|
35
|
+
guard = SpendGuard(workspace="my-app", ceiling_usd=20.0)
|
|
36
|
+
client = guard.wrap_openai(OpenAI()) # or wrap_anthropic(Anthropic())
|
|
37
|
+
|
|
38
|
+
# Call the client exactly as normal — SpendGuard intercepts transparently.
|
|
39
|
+
# If the estimated cost would push cumulative spend past 25% of the $20 ceiling,
|
|
40
|
+
# it raises BudgetExceededError before the API call is made.
|
|
41
|
+
response = client.chat.completions.create(model="gpt-4o", messages=[...])
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install spendguard
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
For more accurate pre-call token counting on OpenAI models:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install spendguard[tiktoken]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## How it works
|
|
57
|
+
|
|
58
|
+
SpendGuard wraps your existing client object. Every call goes through two steps:
|
|
59
|
+
|
|
60
|
+
1. **Pre-call estimate** — approximates the input token count and adds the max output tokens × the model's per-token rate. If `cumulative_spend + estimate > ceiling × threshold_pct`, it raises `BudgetExceededError` before the network call.
|
|
61
|
+
2. **Post-call commit** — reads the provider's actual usage numbers from the response and records the real cost.
|
|
62
|
+
|
|
63
|
+
The default threshold is 25% of the ceiling (`threshold_pct=0.25`). This means a single call can consume at most 25% of your monthly budget — it is a guardrail against a single runaway call, not a hard cap at 100%.
|
|
64
|
+
|
|
65
|
+
## Supported providers and models
|
|
66
|
+
|
|
67
|
+
| Provider | Client wrapper | Models gated by default |
|
|
68
|
+
| ---------- | -------------------- | ----------------------- |
|
|
69
|
+
| OpenAI | `wrap_openai()` | gpt-4o, gpt-4o-mini, and all models in the pricing config |
|
|
70
|
+
| Anthropic | `wrap_anthropic()` | claude-3-5-sonnet, claude-3-opus, haiku, and all models in the pricing config |
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
### Basic setup
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from openai import OpenAI
|
|
78
|
+
from spendguard import SpendGuard
|
|
79
|
+
|
|
80
|
+
guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
|
|
81
|
+
client = guard.wrap_openai(OpenAI())
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
response = client.chat.completions.create(
|
|
85
|
+
model="gpt-4o",
|
|
86
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
87
|
+
max_tokens=512,
|
|
88
|
+
)
|
|
89
|
+
except BudgetExceededError as e:
|
|
90
|
+
print(f"Blocked: {e}")
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Anthropic
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from anthropic import Anthropic
|
|
97
|
+
from spendguard import SpendGuard
|
|
98
|
+
|
|
99
|
+
guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
|
|
100
|
+
client = guard.wrap_anthropic(Anthropic())
|
|
101
|
+
|
|
102
|
+
response = client.messages.create(
|
|
103
|
+
model="claude-sonnet-4-6",
|
|
104
|
+
max_tokens=1024,
|
|
105
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Overriding a block on purpose
|
|
110
|
+
|
|
111
|
+
When you explicitly want to allow a call that would be blocked (e.g., a one-time large batch job), use `track()` with `override=True`:
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
with guard.track(override=True):
|
|
115
|
+
response = client.chat.completions.create(...) # never blocked
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
The override only applies inside the `with` block and does not persist.
|
|
119
|
+
|
|
120
|
+
### Inspecting current spend
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
summary = guard.get_summary()
|
|
124
|
+
# {"ceiling_usd": 20.0, "spent_usd": 1.23, "reserved_usd": 0.0, "threshold_pct": 0.25}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Workspace isolation
|
|
128
|
+
|
|
129
|
+
Each `SpendGuard` instance is scoped to a `workspace` string. When you run multiple products or feature flags, give each its own workspace so their budgets are tracked independently.
|
|
130
|
+
|
|
131
|
+
## Out of scope for v0.1
|
|
132
|
+
|
|
133
|
+
- Streaming calls (`stream=True`) — explicitly rejected with a clear error.
|
|
134
|
+
- Embeddings, images, audio, and other non-chat/messages endpoints.
|
|
135
|
+
- Persistent spend across process restarts (resets on `SpendGuard()` construction).
|
|
136
|
+
|
|
137
|
+
Persistence and streaming support are planned for v1.0.
|
|
138
|
+
|
|
139
|
+
## Feedback
|
|
140
|
+
|
|
141
|
+
Found a bug or have a feature request? [Open an issue](https://github.com/Rahul-git23/spendguard/issues) — all feedback welcome.
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# SpendGuard
|
|
2
|
+
|
|
3
|
+
A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call **before it happens** — no surprises at the end of the month.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
from spendguard import SpendGuard
|
|
7
|
+
|
|
8
|
+
guard = SpendGuard(workspace="my-app", ceiling_usd=20.0)
|
|
9
|
+
client = guard.wrap_openai(OpenAI()) # or wrap_anthropic(Anthropic())
|
|
10
|
+
|
|
11
|
+
# Call the client exactly as normal — SpendGuard intercepts transparently.
|
|
12
|
+
# If the estimated cost would push cumulative spend past 25% of the $20 ceiling,
|
|
13
|
+
# it raises BudgetExceededError before the API call is made.
|
|
14
|
+
response = client.chat.completions.create(model="gpt-4o", messages=[...])
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install spendguard
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
For more accurate pre-call token counting on OpenAI models:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install spendguard[tiktoken]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## How it works
|
|
30
|
+
|
|
31
|
+
SpendGuard wraps your existing client object. Every call goes through two steps:
|
|
32
|
+
|
|
33
|
+
1. **Pre-call estimate** — approximates the input token count and adds the max output tokens × the model's per-token rate. If `cumulative_spend + estimate > ceiling × threshold_pct`, it raises `BudgetExceededError` before the network call.
|
|
34
|
+
2. **Post-call commit** — reads the provider's actual usage numbers from the response and records the real cost.
|
|
35
|
+
|
|
36
|
+
The default threshold is 25% of the ceiling (`threshold_pct=0.25`). This means a single call can consume at most 25% of your monthly budget — it is a guardrail against a single runaway call, not a hard cap at 100%.
|
|
37
|
+
|
|
38
|
+
## Supported providers and models
|
|
39
|
+
|
|
40
|
+
| Provider | Client wrapper | Models gated by default |
|
|
41
|
+
| ---------- | -------------------- | ----------------------- |
|
|
42
|
+
| OpenAI | `wrap_openai()` | gpt-4o, gpt-4o-mini, and all models in the pricing config |
|
|
43
|
+
| Anthropic | `wrap_anthropic()` | claude-3-5-sonnet, claude-3-opus, haiku, and all models in the pricing config |
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
### Basic setup
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from openai import OpenAI
|
|
51
|
+
from spendguard import SpendGuard
|
|
52
|
+
|
|
53
|
+
guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
|
|
54
|
+
client = guard.wrap_openai(OpenAI())
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
response = client.chat.completions.create(
|
|
58
|
+
model="gpt-4o",
|
|
59
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
60
|
+
max_tokens=512,
|
|
61
|
+
)
|
|
62
|
+
except BudgetExceededError as e:
|
|
63
|
+
print(f"Blocked: {e}")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Anthropic
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from anthropic import Anthropic
|
|
70
|
+
from spendguard import SpendGuard
|
|
71
|
+
|
|
72
|
+
guard = SpendGuard(workspace="my-product", ceiling_usd=20.0)
|
|
73
|
+
client = guard.wrap_anthropic(Anthropic())
|
|
74
|
+
|
|
75
|
+
response = client.messages.create(
|
|
76
|
+
model="claude-sonnet-4-6",
|
|
77
|
+
max_tokens=1024,
|
|
78
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
79
|
+
)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Overriding a block on purpose
|
|
83
|
+
|
|
84
|
+
When you explicitly want to allow a call that would be blocked (e.g., a one-time large batch job), use `track()` with `override=True`:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
with guard.track(override=True):
|
|
88
|
+
response = client.chat.completions.create(...) # never blocked
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The override only applies inside the `with` block and does not persist.
|
|
92
|
+
|
|
93
|
+
### Inspecting current spend
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
summary = guard.get_summary()
|
|
97
|
+
# {"ceiling_usd": 20.0, "spent_usd": 1.23, "reserved_usd": 0.0, "threshold_pct": 0.25}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Workspace isolation
|
|
101
|
+
|
|
102
|
+
Each `SpendGuard` instance is scoped to a `workspace` string. When you run multiple products or feature flags, give each its own workspace so their budgets are tracked independently.
|
|
103
|
+
|
|
104
|
+
## Out of scope for v0.1
|
|
105
|
+
|
|
106
|
+
- Streaming calls (`stream=True`) — explicitly rejected with a clear error.
|
|
107
|
+
- Embeddings, images, audio, and other non-chat/messages endpoints.
|
|
108
|
+
- Persistent spend across process restarts (resets on `SpendGuard()` construction).
|
|
109
|
+
|
|
110
|
+
Persistence and streaming support are planned for v1.0.
|
|
111
|
+
|
|
112
|
+
## Feedback
|
|
113
|
+
|
|
114
|
+
Found a bug or have a feature request? [Open an issue](https://github.com/Rahul-git23/spendguard/issues) — all feedback welcome.
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "spendguard"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A 2-line wrapper around your OpenAI or Anthropic client that blocks an over-budget API call before it happens."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Rahul Vichare", email = "rahulvichare@gmail.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["openai", "anthropic", "llm", "cost", "budget", "guardrail", "ai"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"openai>=1.0.0",
|
|
29
|
+
"anthropic>=0.25.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = ["pytest>=7.0.0"]
|
|
34
|
+
tiktoken = ["tiktoken>=0.5.0"]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/Rahul-git23/spendguard"
|
|
38
|
+
Repository = "https://github.com/Rahul-git23/spendguard"
|
|
39
|
+
|
|
40
|
+
[tool.hatch.build.targets.wheel]
|
|
41
|
+
packages = ["src/spendguard"]
|
|
42
|
+
|
|
43
|
+
[tool.pytest.ini_options]
|
|
44
|
+
testpaths = ["tests"]
|
|
45
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""spendguard -- blocks an over-budget LLM API call before it happens.
|
|
2
|
+
|
|
3
|
+
Build status: Stage 4 (core platform build), matching README.md's quickstart.
|
|
4
|
+
SpendGuard.wrap_openai() / wrap_anthropic() / track() are implemented and
|
|
5
|
+
gate client.chat.completions.create() / client.messages.create() respectively
|
|
6
|
+
-- every other client attribute (embeddings, models, ...) and streaming calls
|
|
7
|
+
(stream=True) are explicitly out of scope for this MVP wrapper, not silently
|
|
8
|
+
mishandled. Pricing data in config/ is placeholder, not verified current
|
|
9
|
+
rates -- see cost/pricing.py.
|
|
10
|
+
"""
|
|
11
|
+
from .exceptions import BudgetError, BudgetExceededError, PricingDataError
|
|
12
|
+
from .tracker import SpendTracker
|
|
13
|
+
from .cost import CostCalculator, CostEstimator, ModelPrice, PricingTable
|
|
14
|
+
from .providers import AnthropicProvider, OpenAIProvider, Provider, Usage
|
|
15
|
+
from .session import SpendGuard
|
|
16
|
+
from .wrappers import AnthropicClientWrapper, OpenAIClientWrapper
|
|
17
|
+
|
|
18
|
+
__version__ = "0.1.0"
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"SpendGuard",
|
|
22
|
+
"SpendTracker",
|
|
23
|
+
"BudgetError",
|
|
24
|
+
"BudgetExceededError",
|
|
25
|
+
"PricingDataError",
|
|
26
|
+
"CostCalculator",
|
|
27
|
+
"CostEstimator",
|
|
28
|
+
"ModelPrice",
|
|
29
|
+
"PricingTable",
|
|
30
|
+
"Provider",
|
|
31
|
+
"Usage",
|
|
32
|
+
"OpenAIProvider",
|
|
33
|
+
"AnthropicProvider",
|
|
34
|
+
"OpenAIClientWrapper",
|
|
35
|
+
"AnthropicClientWrapper",
|
|
36
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Pricing last verified 2026-06-25 against anthropic.com/pricing. Update _version_date and rates when Anthropic publishes a price change.",
|
|
3
|
+
"_version_date": "2026-06-25",
|
|
4
|
+
"claude-haiku-4-5": {"input_per_million": 1.00, "output_per_million": 5.00},
|
|
5
|
+
"claude-sonnet-4-6": {"input_per_million": 3.00, "output_per_million": 15.00},
|
|
6
|
+
"claude-opus-4-6": {"input_per_million": 15.00, "output_per_million": 75.00}
|
|
7
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_note": "Pricing last verified 2026-06-25 against openai.com/api/pricing. Update _version_date and rates when OpenAI publishes a price change.",
|
|
3
|
+
"_version_date": "2026-06-25",
|
|
4
|
+
"gpt-4o-mini": {"input_per_million": 0.15, "output_per_million": 0.60},
|
|
5
|
+
"gpt-4o": {"input_per_million": 2.50, "output_per_million": 10.00},
|
|
6
|
+
"gpt-4.1-mini": {"input_per_million": 0.40, "output_per_million": 1.60}
|
|
7
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Thread-local override state shared between SpendGuard.track() and the
|
|
2
|
+
provider wrappers it gates.
|
|
3
|
+
|
|
4
|
+
`with guard.track(override=True):` has to affect only calls made on the
|
|
5
|
+
current thread inside that block, not every thread sharing the same
|
|
6
|
+
SpendGuard -- otherwise one thread's override would silently apply to
|
|
7
|
+
another's concurrent call.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import threading
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OverrideContext:
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._local = threading.local()
|
|
17
|
+
|
|
18
|
+
def push(self, override: bool) -> None:
|
|
19
|
+
stack = getattr(self._local, "stack", None)
|
|
20
|
+
if stack is None:
|
|
21
|
+
stack = []
|
|
22
|
+
self._local.stack = stack
|
|
23
|
+
stack.append(override)
|
|
24
|
+
|
|
25
|
+
def pop(self) -> None:
|
|
26
|
+
self._local.stack.pop()
|
|
27
|
+
|
|
28
|
+
def current(self) -> bool:
|
|
29
|
+
stack = getattr(self._local, "stack", None)
|
|
30
|
+
return bool(stack) and stack[-1]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TrackContext:
|
|
34
|
+
"""Returned by SpendGuard.track() -- see README.md's "Overriding a block on purpose"."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, override_context: OverrideContext, override: bool) -> None:
|
|
37
|
+
self._override_context = override_context
|
|
38
|
+
self._override = override
|
|
39
|
+
|
|
40
|
+
def __enter__(self) -> "TrackContext":
|
|
41
|
+
self._override_context.push(self._override)
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def __exit__(self, exc_type, exc, tb) -> bool:
|
|
45
|
+
self._override_context.pop()
|
|
46
|
+
return False
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""CostCalculator -- turns real, post-call token usage into an actual dollar cost.
|
|
2
|
+
|
|
3
|
+
Always the source of truth recorded into SpendTracker.commit() -- never the
|
|
4
|
+
pre-call estimate, once the provider's real usage numbers are known.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from ..providers.base import Usage
|
|
9
|
+
from .pricing import PricingTable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CostCalculator:
|
|
13
|
+
def __init__(self, pricing: PricingTable) -> None:
|
|
14
|
+
self._pricing = pricing
|
|
15
|
+
|
|
16
|
+
def actual_cost_usd(self, provider: str, model: str, usage: Usage) -> float:
|
|
17
|
+
price = self._pricing.get_price(provider, model)
|
|
18
|
+
return (
|
|
19
|
+
usage.input_tokens / 1_000_000 * price.input_per_million
|
|
20
|
+
+ usage.output_tokens / 1_000_000 * price.output_per_million
|
|
21
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""CostEstimator -- pre-call cost estimate from a prompt and max output size.
|
|
2
|
+
|
|
3
|
+
Zero required dependency: input tokens are approximated at ~4 characters per
|
|
4
|
+
token unless tiktoken is installed (pip install spendguard[tiktoken]), in
|
|
5
|
+
which case OpenAI prompts get exact cl100k_base counts. The estimate only has
|
|
6
|
+
to be close enough to gate correctly -- CostCalculator always recomputes the
|
|
7
|
+
real cost from the provider's own usage numbers after the call resolves.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from .pricing import PricingTable
|
|
12
|
+
|
|
13
|
+
CHARS_PER_TOKEN_APPROX = 4
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import tiktoken
|
|
17
|
+
|
|
18
|
+
_ENCODING = tiktoken.get_encoding("cl100k_base")
|
|
19
|
+
except ImportError:
|
|
20
|
+
_ENCODING = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _count_input_tokens(prompt_text: str, provider: str) -> int:
|
|
24
|
+
if _ENCODING is not None and provider == "openai":
|
|
25
|
+
return max(1, len(_ENCODING.encode(prompt_text)))
|
|
26
|
+
return max(1, len(prompt_text) // CHARS_PER_TOKEN_APPROX)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CostEstimator:
|
|
30
|
+
def __init__(self, pricing: PricingTable) -> None:
|
|
31
|
+
self._pricing = pricing
|
|
32
|
+
|
|
33
|
+
def estimate_usd(
|
|
34
|
+
self,
|
|
35
|
+
provider: str,
|
|
36
|
+
model: str,
|
|
37
|
+
prompt_text: str,
|
|
38
|
+
max_output_tokens: int,
|
|
39
|
+
) -> float:
|
|
40
|
+
price = self._pricing.get_price(provider, model)
|
|
41
|
+
input_tokens = _count_input_tokens(prompt_text, provider)
|
|
42
|
+
return (
|
|
43
|
+
input_tokens / 1_000_000 * price.input_per_million
|
|
44
|
+
+ max_output_tokens / 1_000_000 * price.output_per_million
|
|
45
|
+
)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""PricingTable -- loads per-provider model pricing from config/pricing_<provider>.json.
|
|
2
|
+
|
|
3
|
+
Adding a new provider's prices later is a new config/pricing_<provider>.json
|
|
4
|
+
file, not a code change here. Keys starting with "_" (e.g. "_note") are
|
|
5
|
+
metadata, not models, and are skipped when loading.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import warnings
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import date, datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, Optional
|
|
16
|
+
|
|
17
|
+
from ..exceptions import PricingDataError
|
|
18
|
+
|
|
19
|
+
_STALENESS_DAYS = 90
|
|
20
|
+
_log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
DEFAULT_CONFIG_DIR = Path(__file__).resolve().parent.parent / "config"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class ModelPrice:
|
|
27
|
+
input_per_million: float
|
|
28
|
+
output_per_million: float
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PricingTable:
|
|
32
|
+
def __init__(self, config_dir: Optional[Path] = None) -> None:
|
|
33
|
+
self._config_dir = config_dir if config_dir is not None else DEFAULT_CONFIG_DIR
|
|
34
|
+
self._cache: Dict[str, Dict[str, ModelPrice]] = {}
|
|
35
|
+
|
|
36
|
+
def _load_provider(self, provider: str) -> Dict[str, ModelPrice]:
|
|
37
|
+
if provider in self._cache:
|
|
38
|
+
return self._cache[provider]
|
|
39
|
+
|
|
40
|
+
path = self._config_dir / f"pricing_{provider}.json"
|
|
41
|
+
if not path.exists():
|
|
42
|
+
raise PricingDataError(f"no pricing config for provider '{provider}' (looked for {path})")
|
|
43
|
+
|
|
44
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
45
|
+
|
|
46
|
+
version_date_str = raw.get("_version_date")
|
|
47
|
+
if version_date_str:
|
|
48
|
+
try:
|
|
49
|
+
version_date = datetime.strptime(version_date_str, "%Y-%m-%d").date()
|
|
50
|
+
age_days = (date.today() - version_date).days
|
|
51
|
+
if age_days > _STALENESS_DAYS:
|
|
52
|
+
warnings.warn(
|
|
53
|
+
f"SpendGuard: {provider} pricing data is {age_days} days old "
|
|
54
|
+
f"(last verified {version_date_str}). Cost estimates may be inaccurate "
|
|
55
|
+
f"if {provider} has changed their prices. Update config/pricing_{provider}.json "
|
|
56
|
+
f"or pass a custom config_dir to PricingTable().",
|
|
57
|
+
stacklevel=3,
|
|
58
|
+
)
|
|
59
|
+
except ValueError:
|
|
60
|
+
_log.debug("Could not parse _version_date '%s' in pricing_%s.json", version_date_str, provider)
|
|
61
|
+
|
|
62
|
+
prices = {
|
|
63
|
+
model: ModelPrice(
|
|
64
|
+
input_per_million=entry["input_per_million"],
|
|
65
|
+
output_per_million=entry["output_per_million"],
|
|
66
|
+
)
|
|
67
|
+
for model, entry in raw.items()
|
|
68
|
+
if not model.startswith("_")
|
|
69
|
+
}
|
|
70
|
+
self._cache[provider] = prices
|
|
71
|
+
return prices
|
|
72
|
+
|
|
73
|
+
def get_price(self, provider: str, model: str) -> ModelPrice:
|
|
74
|
+
prices = self._load_provider(provider)
|
|
75
|
+
if model not in prices:
|
|
76
|
+
raise PricingDataError(f"unknown model '{model}' for provider '{provider}'")
|
|
77
|
+
return prices[model]
|