streamctx 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamctx-0.3.0/LICENSE +21 -0
- streamctx-0.3.0/PKG-INFO +205 -0
- streamctx-0.3.0/README.md +163 -0
- streamctx-0.3.0/setup.cfg +4 -0
- streamctx-0.3.0/setup.py +43 -0
- streamctx-0.3.0/src/streamctx.egg-info/PKG-INFO +205 -0
- streamctx-0.3.0/src/streamctx.egg-info/SOURCES.txt +15 -0
- streamctx-0.3.0/src/streamctx.egg-info/dependency_links.txt +1 -0
- streamctx-0.3.0/src/streamctx.egg-info/requires.txt +11 -0
- streamctx-0.3.0/src/streamctx.egg-info/top_level.txt +1 -0
- streamctx-0.3.0/tests/test_checkpoint.py +49 -0
- streamctx-0.3.0/tests/test_compression.py +60 -0
- streamctx-0.3.0/tests/test_diff.py +65 -0
- streamctx-0.3.0/tests/test_healing.py +74 -0
- streamctx-0.3.0/tests/test_openai_integration.py +138 -0
- streamctx-0.3.0/tests/test_poison.py +59 -0
- streamctx-0.3.0/tests/test_streaming.py +160 -0
streamctx-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sneh R Joshi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
streamctx-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: streamctx
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Context health monitoring for AI agents — detect poisoning, drift, loops
|
|
5
|
+
Home-page: https://github.com/streamctx/streamctx
|
|
6
|
+
Author: Sneh R Joshi
|
|
7
|
+
Author-email: joshisneh51@gmail.com
|
|
8
|
+
Keywords: llm,ai,agent,context,monitoring,observability,openai,anthropic,token,checkpoint,compression,self-healing
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: rich>=13.0.0
|
|
23
|
+
Provides-Extra: openai
|
|
24
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
25
|
+
Provides-Extra: anthropic
|
|
26
|
+
Requires-Dist: anthropic>=0.25.0; extra == "anthropic"
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
29
|
+
Requires-Dist: anthropic>=0.25.0; extra == "all"
|
|
30
|
+
Dynamic: author
|
|
31
|
+
Dynamic: author-email
|
|
32
|
+
Dynamic: classifier
|
|
33
|
+
Dynamic: description
|
|
34
|
+
Dynamic: description-content-type
|
|
35
|
+
Dynamic: home-page
|
|
36
|
+
Dynamic: keywords
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
Dynamic: provides-extra
|
|
39
|
+
Dynamic: requires-dist
|
|
40
|
+
Dynamic: requires-python
|
|
41
|
+
Dynamic: summary
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# StreamCtx 🧠
|
|
45
|
+
|
|
46
|
+
**Your AI agent is silently corrupting its own context. StreamCtx detects it — and fixes it.**
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
pip install streamctx
|
|
51
|
+
|
|
52
|
+
## 2-Line Setup
|
|
53
|
+
|
|
54
|
+
import streamctx
|
|
55
|
+
streamctx.start() # patches OpenAI + Anthropic automatically
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## The Problem Nobody Talks About
|
|
60
|
+
|
|
61
|
+
You ship an AI agent. It works perfectly in demos.
|
|
62
|
+
|
|
63
|
+
Then in production:
|
|
64
|
+
- Agent gets stuck repeating the same failed action 58 times
|
|
65
|
+
- Context from step 3 contradicts context from step 7
|
|
66
|
+
- Agent hallucinates a tool call, writes it to memory, references it forever
|
|
67
|
+
- Your $0.50 task costs $50 because nobody set a limit
|
|
68
|
+
|
|
69
|
+
Every LLM observability tool tracks tokens. Nobody tracks context health.
|
|
70
|
+
|
|
71
|
+
Until now.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## What StreamCtx Does
|
|
76
|
+
|
|
77
|
+
### 1. Context Poison Detection
|
|
78
|
+
|
|
79
|
+
result = streamctx.scan(messages)
|
|
80
|
+
print(result["health_score"]) # 25/100
|
|
81
|
+
print(result["warnings"])
|
|
82
|
+
# ⚠️ Repeated errors: 'failed' 4x — agent stuck in loop
|
|
83
|
+
# 🚨 Context severely poisoned — resume from checkpoint
|
|
84
|
+
|
|
85
|
+
### 2. Context Diff — See Exactly What Changed
|
|
86
|
+
|
|
87
|
+
diff = streamctx.context_diff(step3_msgs, step7_msgs, step_a=3, step_b=7)
|
|
88
|
+
print(diff["summary"])
|
|
89
|
+
# ⚠️ System prompt REMOVED — agent lost instructions
|
|
90
|
+
# ⚠️ Contradiction: 'use gpt' added but 'use claude' removed
|
|
91
|
+
# Drift Score: 50/100
|
|
92
|
+
|
|
93
|
+
### 3. Auto-Checkpoint + Resume
|
|
94
|
+
|
|
95
|
+
session_id = streamctx.get_session_id()
|
|
96
|
+
messages = streamctx.resume(session_id)
|
|
97
|
+
# Pick up exactly where agent left off
|
|
98
|
+
|
|
99
|
+
### 4. 50% Token Compression
|
|
100
|
+
|
|
101
|
+
result = streamctx.compress(messages, max_tokens=2000)
|
|
102
|
+
# 140 tokens → 70 tokens (50% reduction)
|
|
103
|
+
|
|
104
|
+
### 5. Self-Healing
|
|
105
|
+
|
|
106
|
+
stats = streamctx.healing_stats()
|
|
107
|
+
# failures: 1, recoveries: 1
|
|
108
|
+
|
|
109
|
+
### 6. Full Session Report
|
|
110
|
+
|
|
111
|
+
streamctx.report()
|
|
112
|
+
streamctx.stop()
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Feature Comparison
|
|
117
|
+
|
|
118
|
+
Feature | StreamCtx | Langfuse | LangSmith | Mem0
|
|
119
|
+
---------------------|-----------|----------|-----------|-----
|
|
120
|
+
Token tracking | YES | YES | YES | NO
|
|
121
|
+
Cost estimation | YES | YES | YES | NO
|
|
122
|
+
Context Poison Det. | YES | NO | NO | NO
|
|
123
|
+
Context Diff | YES | NO | NO | NO
|
|
124
|
+
Auto-checkpoint | YES | NO | NO | NO
|
|
125
|
+
50% Compression | YES | NO | NO | NO
|
|
126
|
+
Self-healing | YES | NO | NO | NO
|
|
127
|
+
Zero config | YES | NO | NO | NO
|
|
128
|
+
Open source | YES | YES | NO | NO
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Quick Start
|
|
133
|
+
|
|
134
|
+
import streamctx
|
|
135
|
+
from openai import OpenAI
|
|
136
|
+
|
|
137
|
+
streamctx.start()
|
|
138
|
+
client = OpenAI()
|
|
139
|
+
|
|
140
|
+
messages = [{"role": "user", "content": "Hello!"}]
|
|
141
|
+
response = client.chat.completions.create(
|
|
142
|
+
model="gpt-4o-mini",
|
|
143
|
+
messages=messages,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
result = streamctx.scan(messages)
|
|
147
|
+
print(result["health_score"])
|
|
148
|
+
print(result["recommendation"])
|
|
149
|
+
|
|
150
|
+
streamctx.report()
|
|
151
|
+
streamctx.stop()
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## API Reference
|
|
156
|
+
|
|
157
|
+
streamctx.start() # start tracking
|
|
158
|
+
streamctx.stop() # stop tracking
|
|
159
|
+
streamctx.report() # print full report
|
|
160
|
+
streamctx.wrap(client) # manually wrap client
|
|
161
|
+
|
|
162
|
+
streamctx.scan(messages) # context health score
|
|
163
|
+
streamctx.context_diff(a, b) # compare two steps
|
|
164
|
+
|
|
165
|
+
streamctx.checkpoint() # save checkpoint
|
|
166
|
+
streamctx.resume(session_id) # resume from checkpoint
|
|
167
|
+
streamctx.get_session_id() # current session ID
|
|
168
|
+
|
|
169
|
+
streamctx.compress(messages) # 50% token compression
|
|
170
|
+
streamctx.healing_stats() # self-healing stats
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Why StreamCtx?
|
|
175
|
+
|
|
176
|
+
Most tools answer: "How many tokens did I use?"
|
|
177
|
+
|
|
178
|
+
StreamCtx answers: "Why is my agent broken — and how do I fix it?"
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Roadmap
|
|
183
|
+
|
|
184
|
+
DONE:
|
|
185
|
+
- Token tracking + cost estimation
|
|
186
|
+
- Context poison detection
|
|
187
|
+
- Context diff + drift scoring
|
|
188
|
+
- Auto-checkpoint + resume
|
|
189
|
+
- 50% token compression
|
|
190
|
+
- Self-healing engine
|
|
191
|
+
|
|
192
|
+
COMING:
|
|
193
|
+
- Context budget manager (v0.4.0)
|
|
194
|
+
- Visual dashboard
|
|
195
|
+
- Multi-agent support
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## License
|
|
200
|
+
|
|
201
|
+
MIT - Sneh R Joshi
|
|
202
|
+
|
|
203
|
+
Built by a solo founder who got tired of AI agents silently going insane.
|
|
204
|
+
|
|
205
|
+
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
|
|
2
|
+
# StreamCtx 🧠
|
|
3
|
+
|
|
4
|
+
**Your AI agent is silently corrupting its own context. StreamCtx detects it — and fixes it.**
|
|
5
|
+
|
|
6
|
+
## Install
|
|
7
|
+
|
|
8
|
+
pip install streamctx
|
|
9
|
+
|
|
10
|
+
## 2-Line Setup
|
|
11
|
+
|
|
12
|
+
import streamctx
|
|
13
|
+
streamctx.start() # patches OpenAI + Anthropic automatically
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## The Problem Nobody Talks About
|
|
18
|
+
|
|
19
|
+
You ship an AI agent. It works perfectly in demos.
|
|
20
|
+
|
|
21
|
+
Then in production:
|
|
22
|
+
- Agent gets stuck repeating the same failed action 58 times
|
|
23
|
+
- Context from step 3 contradicts context from step 7
|
|
24
|
+
- Agent hallucinates a tool call, writes it to memory, references it forever
|
|
25
|
+
- Your $0.50 task costs $50 because nobody set a limit
|
|
26
|
+
|
|
27
|
+
Every LLM observability tool tracks tokens. Nobody tracks context health.
|
|
28
|
+
|
|
29
|
+
Until now.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## What StreamCtx Does
|
|
34
|
+
|
|
35
|
+
### 1. Context Poison Detection
|
|
36
|
+
|
|
37
|
+
result = streamctx.scan(messages)
|
|
38
|
+
print(result["health_score"]) # 25/100
|
|
39
|
+
print(result["warnings"])
|
|
40
|
+
# ⚠️ Repeated errors: 'failed' 4x — agent stuck in loop
|
|
41
|
+
# 🚨 Context severely poisoned — resume from checkpoint
|
|
42
|
+
|
|
43
|
+
### 2. Context Diff — See Exactly What Changed
|
|
44
|
+
|
|
45
|
+
diff = streamctx.context_diff(step3_msgs, step7_msgs, step_a=3, step_b=7)
|
|
46
|
+
print(diff["summary"])
|
|
47
|
+
# ⚠️ System prompt REMOVED — agent lost instructions
|
|
48
|
+
# ⚠️ Contradiction: 'use gpt' added but 'use claude' removed
|
|
49
|
+
# Drift Score: 50/100
|
|
50
|
+
|
|
51
|
+
### 3. Auto-Checkpoint + Resume
|
|
52
|
+
|
|
53
|
+
session_id = streamctx.get_session_id()
|
|
54
|
+
messages = streamctx.resume(session_id)
|
|
55
|
+
# Pick up exactly where agent left off
|
|
56
|
+
|
|
57
|
+
### 4. 50% Token Compression
|
|
58
|
+
|
|
59
|
+
result = streamctx.compress(messages, max_tokens=2000)
|
|
60
|
+
# 140 tokens → 70 tokens (50% reduction)
|
|
61
|
+
|
|
62
|
+
### 5. Self-Healing
|
|
63
|
+
|
|
64
|
+
stats = streamctx.healing_stats()
|
|
65
|
+
# failures: 1, recoveries: 1
|
|
66
|
+
|
|
67
|
+
### 6. Full Session Report
|
|
68
|
+
|
|
69
|
+
streamctx.report()
|
|
70
|
+
streamctx.stop()
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Feature Comparison
|
|
75
|
+
|
|
76
|
+
Feature | StreamCtx | Langfuse | LangSmith | Mem0
|
|
77
|
+
---------------------|-----------|----------|-----------|-----
|
|
78
|
+
Token tracking | YES | YES | YES | NO
|
|
79
|
+
Cost estimation | YES | YES | YES | NO
|
|
80
|
+
Context Poison Det. | YES | NO | NO | NO
|
|
81
|
+
Context Diff | YES | NO | NO | NO
|
|
82
|
+
Auto-checkpoint | YES | NO | NO | NO
|
|
83
|
+
50% Compression | YES | NO | NO | NO
|
|
84
|
+
Self-healing | YES | NO | NO | NO
|
|
85
|
+
Zero config | YES | NO | NO | NO
|
|
86
|
+
Open source | YES | YES | NO | NO
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Quick Start
|
|
91
|
+
|
|
92
|
+
import streamctx
|
|
93
|
+
from openai import OpenAI
|
|
94
|
+
|
|
95
|
+
streamctx.start()
|
|
96
|
+
client = OpenAI()
|
|
97
|
+
|
|
98
|
+
messages = [{"role": "user", "content": "Hello!"}]
|
|
99
|
+
response = client.chat.completions.create(
|
|
100
|
+
model="gpt-4o-mini",
|
|
101
|
+
messages=messages,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
result = streamctx.scan(messages)
|
|
105
|
+
print(result["health_score"])
|
|
106
|
+
print(result["recommendation"])
|
|
107
|
+
|
|
108
|
+
streamctx.report()
|
|
109
|
+
streamctx.stop()
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## API Reference
|
|
114
|
+
|
|
115
|
+
streamctx.start() # start tracking
|
|
116
|
+
streamctx.stop() # stop tracking
|
|
117
|
+
streamctx.report() # print full report
|
|
118
|
+
streamctx.wrap(client) # manually wrap client
|
|
119
|
+
|
|
120
|
+
streamctx.scan(messages) # context health score
|
|
121
|
+
streamctx.context_diff(a, b) # compare two steps
|
|
122
|
+
|
|
123
|
+
streamctx.checkpoint() # save checkpoint
|
|
124
|
+
streamctx.resume(session_id) # resume from checkpoint
|
|
125
|
+
streamctx.get_session_id() # current session ID
|
|
126
|
+
|
|
127
|
+
streamctx.compress(messages) # 50% token compression
|
|
128
|
+
streamctx.healing_stats() # self-healing stats
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Why StreamCtx?
|
|
133
|
+
|
|
134
|
+
Most tools answer: "How many tokens did I use?"
|
|
135
|
+
|
|
136
|
+
StreamCtx answers: "Why is my agent broken — and how do I fix it?"
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Roadmap
|
|
141
|
+
|
|
142
|
+
DONE:
|
|
143
|
+
- Token tracking + cost estimation
|
|
144
|
+
- Context poison detection
|
|
145
|
+
- Context diff + drift scoring
|
|
146
|
+
- Auto-checkpoint + resume
|
|
147
|
+
- 50% token compression
|
|
148
|
+
- Self-healing engine
|
|
149
|
+
|
|
150
|
+
COMING:
|
|
151
|
+
- Context budget manager (v0.4.0)
|
|
152
|
+
- Visual dashboard
|
|
153
|
+
- Multi-agent support
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
MIT - Sneh R Joshi
|
|
160
|
+
|
|
161
|
+
Built by a solo founder who got tired of AI agents silently going insane.
|
|
162
|
+
|
|
163
|
+
|
streamctx-0.3.0/setup.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from setuptools import find_packages, setup
|
|
2
|
+
|
|
3
|
+
with open("README.md", encoding="utf-8") as f:
|
|
4
|
+
long_description = f.read()
|
|
5
|
+
|
|
6
|
+
setup(
|
|
7
|
+
name="streamctx",
|
|
8
|
+
version="0.3.0",
|
|
9
|
+
description="Context health monitoring for AI agents — detect poisoning, drift, loops",
|
|
10
|
+
long_description=long_description,
|
|
11
|
+
long_description_content_type="text/markdown",
|
|
12
|
+
author="Sneh R Joshi",
|
|
13
|
+
author_email="joshisneh51@gmail.com",
|
|
14
|
+
url="https://github.com/streamctx/streamctx",
|
|
15
|
+
packages=find_packages(where="src"),
|
|
16
|
+
package_dir={"": "src"},
|
|
17
|
+
python_requires=">=3.9",
|
|
18
|
+
install_requires=[
|
|
19
|
+
"rich>=13.0.0",
|
|
20
|
+
],
|
|
21
|
+
extras_require={
|
|
22
|
+
"openai": ["openai>=1.0.0"],
|
|
23
|
+
"anthropic": ["anthropic>=0.25.0"],
|
|
24
|
+
"all": ["openai>=1.0.0", "anthropic>=0.25.0"],
|
|
25
|
+
},
|
|
26
|
+
classifiers=[
|
|
27
|
+
"Development Status :: 4 - Beta",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"License :: OSI Approved :: MIT License",
|
|
30
|
+
"Programming Language :: Python :: 3",
|
|
31
|
+
"Programming Language :: Python :: 3.9",
|
|
32
|
+
"Programming Language :: Python :: 3.10",
|
|
33
|
+
"Programming Language :: Python :: 3.11",
|
|
34
|
+
"Programming Language :: Python :: 3.12",
|
|
35
|
+
"Topic :: Software Development :: Libraries",
|
|
36
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
37
|
+
],
|
|
38
|
+
keywords=[
|
|
39
|
+
"llm", "ai", "agent", "context", "monitoring",
|
|
40
|
+
"observability", "openai", "anthropic", "token",
|
|
41
|
+
"checkpoint", "compression", "self-healing",
|
|
42
|
+
],
|
|
43
|
+
)
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: streamctx
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Context health monitoring for AI agents — detect poisoning, drift, loops
|
|
5
|
+
Home-page: https://github.com/streamctx/streamctx
|
|
6
|
+
Author: Sneh R Joshi
|
|
7
|
+
Author-email: joshisneh51@gmail.com
|
|
8
|
+
Keywords: llm,ai,agent,context,monitoring,observability,openai,anthropic,token,checkpoint,compression,self-healing
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: rich>=13.0.0
|
|
23
|
+
Provides-Extra: openai
|
|
24
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
25
|
+
Provides-Extra: anthropic
|
|
26
|
+
Requires-Dist: anthropic>=0.25.0; extra == "anthropic"
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
29
|
+
Requires-Dist: anthropic>=0.25.0; extra == "all"
|
|
30
|
+
Dynamic: author
|
|
31
|
+
Dynamic: author-email
|
|
32
|
+
Dynamic: classifier
|
|
33
|
+
Dynamic: description
|
|
34
|
+
Dynamic: description-content-type
|
|
35
|
+
Dynamic: home-page
|
|
36
|
+
Dynamic: keywords
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
Dynamic: provides-extra
|
|
39
|
+
Dynamic: requires-dist
|
|
40
|
+
Dynamic: requires-python
|
|
41
|
+
Dynamic: summary
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# StreamCtx 🧠
|
|
45
|
+
|
|
46
|
+
**Your AI agent is silently corrupting its own context. StreamCtx detects it — and fixes it.**
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
pip install streamctx
|
|
51
|
+
|
|
52
|
+
## 2-Line Setup
|
|
53
|
+
|
|
54
|
+
import streamctx
|
|
55
|
+
streamctx.start() # patches OpenAI + Anthropic automatically
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## The Problem Nobody Talks About
|
|
60
|
+
|
|
61
|
+
You ship an AI agent. It works perfectly in demos.
|
|
62
|
+
|
|
63
|
+
Then in production:
|
|
64
|
+
- Agent gets stuck repeating the same failed action 58 times
|
|
65
|
+
- Context from step 3 contradicts context from step 7
|
|
66
|
+
- Agent hallucinates a tool call, writes it to memory, references it forever
|
|
67
|
+
- Your $0.50 task costs $50 because nobody set a limit
|
|
68
|
+
|
|
69
|
+
Every LLM observability tool tracks tokens. Nobody tracks context health.
|
|
70
|
+
|
|
71
|
+
Until now.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## What StreamCtx Does
|
|
76
|
+
|
|
77
|
+
### 1. Context Poison Detection
|
|
78
|
+
|
|
79
|
+
result = streamctx.scan(messages)
|
|
80
|
+
print(result["health_score"]) # 25/100
|
|
81
|
+
print(result["warnings"])
|
|
82
|
+
# ⚠️ Repeated errors: 'failed' 4x — agent stuck in loop
|
|
83
|
+
# 🚨 Context severely poisoned — resume from checkpoint
|
|
84
|
+
|
|
85
|
+
### 2. Context Diff — See Exactly What Changed
|
|
86
|
+
|
|
87
|
+
diff = streamctx.context_diff(step3_msgs, step7_msgs, step_a=3, step_b=7)
|
|
88
|
+
print(diff["summary"])
|
|
89
|
+
# ⚠️ System prompt REMOVED — agent lost instructions
|
|
90
|
+
# ⚠️ Contradiction: 'use gpt' added but 'use claude' removed
|
|
91
|
+
# Drift Score: 50/100
|
|
92
|
+
|
|
93
|
+
### 3. Auto-Checkpoint + Resume
|
|
94
|
+
|
|
95
|
+
session_id = streamctx.get_session_id()
|
|
96
|
+
messages = streamctx.resume(session_id)
|
|
97
|
+
# Pick up exactly where agent left off
|
|
98
|
+
|
|
99
|
+
### 4. 50% Token Compression
|
|
100
|
+
|
|
101
|
+
result = streamctx.compress(messages, max_tokens=2000)
|
|
102
|
+
# 140 tokens → 70 tokens (50% reduction)
|
|
103
|
+
|
|
104
|
+
### 5. Self-Healing
|
|
105
|
+
|
|
106
|
+
stats = streamctx.healing_stats()
|
|
107
|
+
# failures: 1, recoveries: 1
|
|
108
|
+
|
|
109
|
+
### 6. Full Session Report
|
|
110
|
+
|
|
111
|
+
streamctx.report()
|
|
112
|
+
streamctx.stop()
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Feature Comparison
|
|
117
|
+
|
|
118
|
+
Feature | StreamCtx | Langfuse | LangSmith | Mem0
|
|
119
|
+
---------------------|-----------|----------|-----------|-----
|
|
120
|
+
Token tracking | YES | YES | YES | NO
|
|
121
|
+
Cost estimation | YES | YES | YES | NO
|
|
122
|
+
Context Poison Det. | YES | NO | NO | NO
|
|
123
|
+
Context Diff | YES | NO | NO | NO
|
|
124
|
+
Auto-checkpoint | YES | NO | NO | NO
|
|
125
|
+
50% Compression | YES | NO | NO | NO
|
|
126
|
+
Self-healing | YES | NO | NO | NO
|
|
127
|
+
Zero config | YES | NO | NO | NO
|
|
128
|
+
Open source | YES | YES | NO | NO
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Quick Start
|
|
133
|
+
|
|
134
|
+
import streamctx
|
|
135
|
+
from openai import OpenAI
|
|
136
|
+
|
|
137
|
+
streamctx.start()
|
|
138
|
+
client = OpenAI()
|
|
139
|
+
|
|
140
|
+
messages = [{"role": "user", "content": "Hello!"}]
|
|
141
|
+
response = client.chat.completions.create(
|
|
142
|
+
model="gpt-4o-mini",
|
|
143
|
+
messages=messages,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
result = streamctx.scan(messages)
|
|
147
|
+
print(result["health_score"])
|
|
148
|
+
print(result["recommendation"])
|
|
149
|
+
|
|
150
|
+
streamctx.report()
|
|
151
|
+
streamctx.stop()
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## API Reference
|
|
156
|
+
|
|
157
|
+
streamctx.start() # start tracking
|
|
158
|
+
streamctx.stop() # stop tracking
|
|
159
|
+
streamctx.report() # print full report
|
|
160
|
+
streamctx.wrap(client) # manually wrap client
|
|
161
|
+
|
|
162
|
+
streamctx.scan(messages) # context health score
|
|
163
|
+
streamctx.context_diff(a, b) # compare two steps
|
|
164
|
+
|
|
165
|
+
streamctx.checkpoint() # save checkpoint
|
|
166
|
+
streamctx.resume(session_id) # resume from checkpoint
|
|
167
|
+
streamctx.get_session_id() # current session ID
|
|
168
|
+
|
|
169
|
+
streamctx.compress(messages) # 50% token compression
|
|
170
|
+
streamctx.healing_stats() # self-healing stats
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Why StreamCtx?
|
|
175
|
+
|
|
176
|
+
Most tools answer: "How many tokens did I use?"
|
|
177
|
+
|
|
178
|
+
StreamCtx answers: "Why is my agent broken — and how do I fix it?"
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Roadmap
|
|
183
|
+
|
|
184
|
+
DONE:
|
|
185
|
+
- Token tracking + cost estimation
|
|
186
|
+
- Context poison detection
|
|
187
|
+
- Context diff + drift scoring
|
|
188
|
+
- Auto-checkpoint + resume
|
|
189
|
+
- 50% token compression
|
|
190
|
+
- Self-healing engine
|
|
191
|
+
|
|
192
|
+
COMING:
|
|
193
|
+
- Context budget manager (v0.4.0)
|
|
194
|
+
- Visual dashboard
|
|
195
|
+
- Multi-agent support
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## License
|
|
200
|
+
|
|
201
|
+
MIT - Sneh R Joshi
|
|
202
|
+
|
|
203
|
+
Built by a solo founder who got tired of AI agents silently going insane.
|
|
204
|
+
|
|
205
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
src/streamctx.egg-info/PKG-INFO
|
|
5
|
+
src/streamctx.egg-info/SOURCES.txt
|
|
6
|
+
src/streamctx.egg-info/dependency_links.txt
|
|
7
|
+
src/streamctx.egg-info/requires.txt
|
|
8
|
+
src/streamctx.egg-info/top_level.txt
|
|
9
|
+
tests/test_checkpoint.py
|
|
10
|
+
tests/test_compression.py
|
|
11
|
+
tests/test_diff.py
|
|
12
|
+
tests/test_healing.py
|
|
13
|
+
tests/test_openai_integration.py
|
|
14
|
+
tests/test_poison.py
|
|
15
|
+
tests/test_streaming.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
|
|
2
|
+
"""Pytest version of StreamCtx checkpoint system test."""
|
|
3
|
+
|
|
4
|
+
import streamctx
|
|
5
|
+
from streamctx.tracker import get_tracker
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_checkpoint_and_resume():
|
|
9
|
+
"""Checkpoint should save state, and resume should restore exact messages."""
|
|
10
|
+
streamctx.start()
|
|
11
|
+
session_id = streamctx.get_session_id()
|
|
12
|
+
assert session_id is not None
|
|
13
|
+
|
|
14
|
+
tracker = get_tracker()
|
|
15
|
+
|
|
16
|
+
tracker.state._last_messages = [
|
|
17
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
18
|
+
{"role": "user", "content": "What is Python?"},
|
|
19
|
+
]
|
|
20
|
+
tracker.state.step_counter = 1
|
|
21
|
+
streamctx.checkpoint()
|
|
22
|
+
|
|
23
|
+
tracker.state._last_messages = [
|
|
24
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
25
|
+
{"role": "user", "content": "What is Python?"},
|
|
26
|
+
{"role": "assistant", "content": "Python is a programming language."},
|
|
27
|
+
{"role": "user", "content": "Tell me more."},
|
|
28
|
+
]
|
|
29
|
+
tracker.state.step_counter = 2
|
|
30
|
+
streamctx.checkpoint()
|
|
31
|
+
|
|
32
|
+
messages = streamctx.resume(session_id)
|
|
33
|
+
|
|
34
|
+
assert len(messages) == 4
|
|
35
|
+
assert messages[0]["role"] == "system"
|
|
36
|
+
assert messages[1]["content"] == "What is Python?"
|
|
37
|
+
assert messages[2]["content"] == "Python is a programming language."
|
|
38
|
+
assert messages[3]["content"] == "Tell me more."
|
|
39
|
+
|
|
40
|
+
streamctx.stop()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_resume_unknown_session_returns_empty_or_raises():
|
|
44
|
+
"""Resuming a session that never checkpointed should not silently fabricate data."""
|
|
45
|
+
result = streamctx.resume(999999999)
|
|
46
|
+
assert result == [] or result is None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Pytest suite to verify Context Compression works."""
|
|
2
|
+
|
|
3
|
+
import streamctx
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
messages = [
|
|
7
|
+
{"role": "system", "content": "You are a helpful AI assistant for coding tasks."},
|
|
8
|
+
{"role": "user", "content": "What is Python? Tell me everything about it in detail."},
|
|
9
|
+
{
|
|
10
|
+
"role": "assistant",
|
|
11
|
+
"content": (
|
|
12
|
+
"Python is a high-level, interpreted programming language known for "
|
|
13
|
+
"its simplicity and readability. It was created by Guido van Rossum."
|
|
14
|
+
),
|
|
15
|
+
},
|
|
16
|
+
{"role": "user", "content": "What are Python libraries? Give me a comprehensive list."},
|
|
17
|
+
{
|
|
18
|
+
"role": "assistant",
|
|
19
|
+
"content": (
|
|
20
|
+
"Python has thousands of libraries. NumPy, Pandas, Matplotlib, "
|
|
21
|
+
"Scikit-learn, TensorFlow, PyTorch, Django, Flask, Requests and many more."
|
|
22
|
+
),
|
|
23
|
+
},
|
|
24
|
+
{"role": "user", "content": "Now tell me about neural networks."},
|
|
25
|
+
{
|
|
26
|
+
"role": "assistant",
|
|
27
|
+
"content": "Neural networks are computational models inspired by biological neural networks.",
|
|
28
|
+
},
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_compress_returns_expected_keys():
|
|
33
|
+
"""Compression result should include messages and a stats block."""
|
|
34
|
+
result = streamctx.compress(messages, max_tokens=100, keep_last_n=2)
|
|
35
|
+
|
|
36
|
+
assert "messages" in result
|
|
37
|
+
assert "stats" in result
|
|
38
|
+
|
|
39
|
+
stats = result["stats"]
|
|
40
|
+
assert "original_tokens" in stats
|
|
41
|
+
assert "compressed_tokens" in stats
|
|
42
|
+
assert "saved_tokens" in stats
|
|
43
|
+
assert "compression_pct" in stats
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_compress_reduces_token_count():
|
|
47
|
+
"""Compressed token count should be less than or equal to the original."""
|
|
48
|
+
result = streamctx.compress(messages, max_tokens=100, keep_last_n=2)
|
|
49
|
+
stats = result["stats"]
|
|
50
|
+
|
|
51
|
+
assert stats["compressed_tokens"] <= stats["original_tokens"]
|
|
52
|
+
assert stats["saved_tokens"] >= 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_compress_keeps_last_n_messages():
|
|
56
|
+
"""Compression should not drop more messages than keep_last_n allows from the end."""
|
|
57
|
+
result = streamctx.compress(messages, max_tokens=100, keep_last_n=2)
|
|
58
|
+
|
|
59
|
+
assert len(result["messages"]) >= 2
|
|
60
|
+
assert len(result["messages"]) <= len(messages)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Pytest suite to verify Context Diff works."""
|
|
2
|
+
|
|
3
|
+
import streamctx
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Step 3 messages (earlier)
|
|
7
|
+
step3 = [
|
|
8
|
+
{"role": "system", "content": "You are a helpful assistant. Use Claude only."},
|
|
9
|
+
{"role": "user", "content": "What is Python?"},
|
|
10
|
+
{"role": "assistant", "content": "Python is a programming language."},
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
# Step 7 messages (later - things changed!)
|
|
14
|
+
step7 = [
|
|
15
|
+
{"role": "system", "content": "You are a helpful assistant. Use GPT-4 only."},
|
|
16
|
+
{"role": "user", "content": "What is Python?"},
|
|
17
|
+
{"role": "assistant", "content": "Python is a programming language."},
|
|
18
|
+
{"role": "user", "content": "Now explain JavaScript too."},
|
|
19
|
+
{"role": "assistant", "content": "JavaScript runs in the browser."},
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_diff_minor_change_has_low_drift():
|
|
24
|
+
"""Comparing identical message sets should show minimal/no drift."""
|
|
25
|
+
diff = streamctx.context_diff(step3, step3, step_a=3, step_b=3)
|
|
26
|
+
|
|
27
|
+
assert "summary" in diff
|
|
28
|
+
assert "drift_score" in diff
|
|
29
|
+
assert diff["drift_score"] == 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_diff_significant_drift_detected():
|
|
33
|
+
"""Comparing step3 vs step7 should detect drift, added messages, and removed messages."""
|
|
34
|
+
diff = streamctx.context_diff(step3, step7, step_a=3, step_b=7)
|
|
35
|
+
|
|
36
|
+
assert "summary" in diff
|
|
37
|
+
assert "drift_score" in diff
|
|
38
|
+
assert "added" in diff
|
|
39
|
+
assert "removed" in diff
|
|
40
|
+
assert "token_delta" in diff
|
|
41
|
+
|
|
42
|
+
# step7 has 2 more messages than step3
|
|
43
|
+
assert len(diff["added"]) >= 1
|
|
44
|
+
# drift score should be greater than the identical-message case
|
|
45
|
+
assert diff["drift_score"] > 0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_diff_system_prompt_removed_triggers_warning():
|
|
49
|
+
"""Removing the system prompt between steps should raise a warning."""
|
|
50
|
+
step_with_system = [
|
|
51
|
+
{"role": "system", "content": "Always respond in English only."},
|
|
52
|
+
{"role": "user", "content": "Hello"},
|
|
53
|
+
]
|
|
54
|
+
step_without_system = [
|
|
55
|
+
{"role": "user", "content": "Hello"},
|
|
56
|
+
{"role": "user", "content": "Now respond in French please."},
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
diff = streamctx.context_diff(
|
|
60
|
+
step_with_system, step_without_system, step_a=1, step_b=5
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
assert "summary" in diff
|
|
64
|
+
assert "warnings" in diff
|
|
65
|
+
assert len(diff["warnings"]) >= 1
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Pytest suite to verify StreamCtx self-healing works."""
|
|
2
|
+
|
|
3
|
+
import streamctx
|
|
4
|
+
from streamctx.healer import SelfHealingEngine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
valid_messages = [
|
|
8
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
9
|
+
{"role": "user", "content": "What is Python?"},
|
|
10
|
+
{"role": "assistant", "content": "Python is a programming language."},
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
failed_messages = [
|
|
14
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
15
|
+
{"role": "user", "content": "What is Python?"},
|
|
16
|
+
{"role": "assistant", "content": "Python is a programming language."},
|
|
17
|
+
{"role": "user", "content": "Tell me more about Python."},
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FakeResponse:
|
|
22
|
+
content = "Python is a programming language."
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_healer_records_success_and_can_heal():
|
|
26
|
+
healer = SelfHealingEngine()
|
|
27
|
+
healer.record_success(valid_messages, FakeResponse())
|
|
28
|
+
|
|
29
|
+
assert healer.can_heal() is True
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_healer_records_failure():
|
|
33
|
+
healer = SelfHealingEngine()
|
|
34
|
+
healer.record_success(valid_messages, FakeResponse())
|
|
35
|
+
healer.record_failure()
|
|
36
|
+
|
|
37
|
+
stats = healer.get_stats()
|
|
38
|
+
assert stats["failure_count"] >= 1
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_healer_generates_recovery_messages():
|
|
42
|
+
healer = SelfHealingEngine()
|
|
43
|
+
healer.record_success(valid_messages, FakeResponse())
|
|
44
|
+
healer.record_failure()
|
|
45
|
+
|
|
46
|
+
recovery = healer.get_recovery_messages(failed_messages)
|
|
47
|
+
|
|
48
|
+
assert isinstance(recovery, list)
|
|
49
|
+
assert len(recovery) > 0
|
|
50
|
+
for msg in recovery:
|
|
51
|
+
assert "role" in msg
|
|
52
|
+
assert "content" in msg
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_healer_stats_structure():
|
|
56
|
+
healer = SelfHealingEngine()
|
|
57
|
+
healer.record_success(valid_messages, FakeResponse())
|
|
58
|
+
healer.record_failure()
|
|
59
|
+
stats = healer.get_stats()
|
|
60
|
+
|
|
61
|
+
assert "failure_count" in stats
|
|
62
|
+
assert "recovery_count" in stats
|
|
63
|
+
assert "has_valid_context" in stats
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_streamctx_healing_stats_integration():
|
|
67
|
+
"""Full integration: streamctx.start()/stop() should expose healing_stats()."""
|
|
68
|
+
streamctx.start()
|
|
69
|
+
h_stats = streamctx.healing_stats()
|
|
70
|
+
streamctx.stop()
|
|
71
|
+
|
|
72
|
+
assert h_stats is not None
|
|
73
|
+
|
|
74
|
+
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Integration tests for streamctx with the OpenAI Python SDK."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import unittest
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
import streamctx
|
|
10
|
+
from openai import OpenAI
|
|
11
|
+
from streamctx.tracker import get_tracker
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _mock_openai_handler(request: httpx.Request) -> httpx.Response:
|
|
15
|
+
return httpx.Response(
|
|
16
|
+
200,
|
|
17
|
+
json={
|
|
18
|
+
"id": "chatcmpl-test",
|
|
19
|
+
"object": "chat.completion",
|
|
20
|
+
"created": 0,
|
|
21
|
+
"model": "gpt-4o-mini",
|
|
22
|
+
"choices": [
|
|
23
|
+
{
|
|
24
|
+
"index": 0,
|
|
25
|
+
"message": {
|
|
26
|
+
"role": "assistant",
|
|
27
|
+
"content": "Hello from streamctx test!",
|
|
28
|
+
},
|
|
29
|
+
"finish_reason": "stop",
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"usage": {
|
|
33
|
+
"prompt_tokens": 18,
|
|
34
|
+
"completion_tokens": 6,
|
|
35
|
+
"total_tokens": 24,
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _mock_client() -> OpenAI:
|
|
42
|
+
return OpenAI(
|
|
43
|
+
api_key="sk-test",
|
|
44
|
+
http_client=httpx.Client(transport=httpx.MockTransport(_mock_openai_handler)),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class TestOpenAIIntegration(unittest.TestCase):
|
|
49
|
+
def tearDown(self) -> None:
|
|
50
|
+
streamctx.stop()
|
|
51
|
+
tracker = get_tracker()
|
|
52
|
+
tracker.state.auto_reported = False
|
|
53
|
+
tracker.state.call_count = 0
|
|
54
|
+
tracker.state.active = False
|
|
55
|
+
tracker.state.session_id = None
|
|
56
|
+
tracker.state._wrapped_clients.clear()
|
|
57
|
+
tracker.diff = tracker.diff.__class__()
|
|
58
|
+
|
|
59
|
+
def test_start_tracks_openai_calls(self) -> None:
|
|
60
|
+
streamctx.start()
|
|
61
|
+
client = _mock_client()
|
|
62
|
+
response = client.chat.completions.create(
|
|
63
|
+
model="gpt-4o-mini",
|
|
64
|
+
messages=[
|
|
65
|
+
{"role": "system", "content": "You are helpful."},
|
|
66
|
+
{"role": "user", "content": "Say hello"},
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
self.assertEqual(response.choices[0].message.content, "Hello from streamctx test!")
|
|
71
|
+
stats = get_tracker().get_stats()
|
|
72
|
+
self.assertEqual(stats["call_count"], 1)
|
|
73
|
+
self.assertEqual(stats["total_tokens"], 24)
|
|
74
|
+
self.assertGreater(stats["total_cost"], 0)
|
|
75
|
+
|
|
76
|
+
def test_wrap_without_double_counting(self) -> None:
|
|
77
|
+
streamctx.start()
|
|
78
|
+
client = streamctx.wrap(_mock_client())
|
|
79
|
+
messages = [
|
|
80
|
+
{"role": "system", "content": "You are helpful."},
|
|
81
|
+
{"role": "user", "content": "Hi"},
|
|
82
|
+
]
|
|
83
|
+
client.chat.completions.create(model="gpt-4o-mini", messages=messages)
|
|
84
|
+
client.chat.completions.create(model="gpt-4o-mini", messages=messages)
|
|
85
|
+
|
|
86
|
+
stats = get_tracker().get_stats()
|
|
87
|
+
self.assertEqual(stats["call_count"], 2)
|
|
88
|
+
self.assertEqual(stats["total_tokens"], 48)
|
|
89
|
+
self.assertGreater(stats["reused_tokens"], 0)
|
|
90
|
+
|
|
91
|
+
def test_wrap_only_tracks_client(self) -> None:
|
|
92
|
+
client = streamctx.wrap(_mock_client())
|
|
93
|
+
client.chat.completions.create(
|
|
94
|
+
model="gpt-4o-mini",
|
|
95
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
stats = get_tracker().get_stats()
|
|
99
|
+
self.assertEqual(stats["call_count"], 1)
|
|
100
|
+
self.assertEqual(stats["total_tokens"], 24)
|
|
101
|
+
|
|
102
|
+
def test_report_and_stop(self) -> None:
|
|
103
|
+
streamctx.start()
|
|
104
|
+
_mock_client().chat.completions.create(
|
|
105
|
+
model="gpt-4o-mini",
|
|
106
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
107
|
+
)
|
|
108
|
+
streamctx.report()
|
|
109
|
+
streamctx.stop()
|
|
110
|
+
|
|
111
|
+
self.assertFalse(get_tracker().state.active)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@unittest.skipUnless(os.environ.get("OPENAI_API_KEY"), "OPENAI_API_KEY not set")
|
|
115
|
+
class TestOpenAILive(unittest.TestCase):
|
|
116
|
+
def tearDown(self) -> None:
|
|
117
|
+
streamctx.stop()
|
|
118
|
+
|
|
119
|
+
def test_real_openai_call(self) -> None:
|
|
120
|
+
streamctx.start()
|
|
121
|
+
client = OpenAI()
|
|
122
|
+
response = client.chat.completions.create(
|
|
123
|
+
model="gpt-4o-mini",
|
|
124
|
+
messages=[{"role": "user", "content": "Reply with exactly: pong"}],
|
|
125
|
+
max_tokens=5,
|
|
126
|
+
)
|
|
127
|
+
content = response.choices[0].message.content or ""
|
|
128
|
+
self.assertIn("pong", content.lower())
|
|
129
|
+
|
|
130
|
+
stats = get_tracker().get_stats()
|
|
131
|
+
self.assertEqual(stats["call_count"], 1)
|
|
132
|
+
self.assertGreater(stats["total_tokens"], 0)
|
|
133
|
+
self.assertGreater(stats["total_cost"], 0)
|
|
134
|
+
streamctx.report()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
unittest.main(verbosity=2)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Pytest suite to verify Context Poison Detector works."""
|
|
2
|
+
|
|
3
|
+
import streamctx
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_healthy_context_has_high_health_score():
|
|
7
|
+
healthy_messages = [
|
|
8
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
9
|
+
{"role": "user", "content": "What is Python?"},
|
|
10
|
+
{"role": "assistant", "content": "Python is a programming language."},
|
|
11
|
+
{"role": "user", "content": "Tell me more."},
|
|
12
|
+
{"role": "assistant", "content": "Python is used for web, data science, and AI."},
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
result = streamctx.scan(healthy_messages)
|
|
16
|
+
|
|
17
|
+
assert "health_score" in result
|
|
18
|
+
assert "is_poisoned" in result
|
|
19
|
+
assert "recommendation" in result
|
|
20
|
+
assert result["is_poisoned"] is False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_agent_stuck_in_loop_is_flagged_as_poisoned():
|
|
24
|
+
poisoned_messages = [
|
|
25
|
+
{"role": "system", "content": "You are an API assistant."},
|
|
26
|
+
{"role": "user", "content": "Call the payment API."},
|
|
27
|
+
{"role": "assistant", "content": "Error: endpoint not found. Failed to connect."},
|
|
28
|
+
{"role": "user", "content": "Try again."},
|
|
29
|
+
{"role": "assistant", "content": "Error: endpoint not found. Failed again."},
|
|
30
|
+
{"role": "user", "content": "Try once more."},
|
|
31
|
+
{"role": "assistant", "content": "Error: invalid request. Failed to process."},
|
|
32
|
+
{"role": "user", "content": "One more time."},
|
|
33
|
+
{"role": "assistant", "content": "Error: cannot connect. Failed."},
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
result = streamctx.scan(poisoned_messages)
|
|
37
|
+
|
|
38
|
+
assert "health_score" in result
|
|
39
|
+
assert "is_poisoned" in result
|
|
40
|
+
assert "warnings" in result
|
|
41
|
+
assert result["is_poisoned"] is True
|
|
42
|
+
assert len(result["warnings"]) >= 1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_contradictory_context_is_flagged():
|
|
46
|
+
contradiction_messages = [
|
|
47
|
+
{"role": "system", "content": "The payment feature is enabled and available."},
|
|
48
|
+
{"role": "user", "content": "Process my payment."},
|
|
49
|
+
{"role": "assistant", "content": "Payment processed successfully."},
|
|
50
|
+
{"role": "user", "content": "Check status."},
|
|
51
|
+
{"role": "assistant", "content": "Payment is unavailable. Feature is disabled."},
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
result = streamctx.scan(contradiction_messages)
|
|
55
|
+
|
|
56
|
+
assert "health_score" in result
|
|
57
|
+
assert "is_poisoned" in result
|
|
58
|
+
assert "warnings" in result
|
|
59
|
+
assert "recommendation" in result
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Pytest suite to verify real-time step streaming (SessionStorage + LLMTracker)."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from streamctx.storage import SessionStorage
|
|
6
|
+
from streamctx.tracker import get_tracker
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def storage(tmp_path):
|
|
11
|
+
"""Fresh SessionStorage backed by a temp SQLite file for each test.
|
|
12
|
+
|
|
13
|
+
Uses pytest's built-in tmp_path fixture, which handles Windows-safe
|
|
14
|
+
cleanup automatically (avoids PermissionError on file handle release).
|
|
15
|
+
"""
|
|
16
|
+
db_path = tmp_path / "test_sessions.db"
|
|
17
|
+
return SessionStorage(db_path=db_path)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_start_and_end_session(storage):
|
|
22
|
+
session_id = storage.start_session()
|
|
23
|
+
|
|
24
|
+
assert isinstance(session_id, int)
|
|
25
|
+
assert session_id > 0
|
|
26
|
+
|
|
27
|
+
# Should not raise
|
|
28
|
+
storage.end_session(session_id)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_record_call_streams_to_db(storage):
|
|
32
|
+
"""Every LLM call should be written to the calls table immediately (real-time streaming)."""
|
|
33
|
+
session_id = storage.start_session()
|
|
34
|
+
|
|
35
|
+
storage.record_call(
|
|
36
|
+
session_id=session_id,
|
|
37
|
+
provider="openai",
|
|
38
|
+
model="gpt-4",
|
|
39
|
+
input_tokens=100,
|
|
40
|
+
output_tokens=50,
|
|
41
|
+
cost=0.01,
|
|
42
|
+
reused_tokens=10,
|
|
43
|
+
waste_category=None,
|
|
44
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
stats = storage.get_session_stats(session_id)
|
|
48
|
+
|
|
49
|
+
assert stats["call_count"] == 1
|
|
50
|
+
assert stats["input_tokens"] == 100
|
|
51
|
+
assert stats["output_tokens"] == 50
|
|
52
|
+
assert stats["total_tokens"] == 150
|
|
53
|
+
assert stats["reused_tokens"] == 10
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_record_call_accumulates_across_multiple_calls(storage):
|
|
57
|
+
session_id = storage.start_session()
|
|
58
|
+
|
|
59
|
+
for _ in range(3):
|
|
60
|
+
storage.record_call(
|
|
61
|
+
session_id=session_id,
|
|
62
|
+
provider="openai",
|
|
63
|
+
model="gpt-4",
|
|
64
|
+
input_tokens=100,
|
|
65
|
+
output_tokens=50,
|
|
66
|
+
cost=0.01,
|
|
67
|
+
reused_tokens=0,
|
|
68
|
+
waste_category=None,
|
|
69
|
+
messages=[{"role": "user", "content": "Hi"}],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
stats = storage.get_session_stats(session_id)
|
|
73
|
+
|
|
74
|
+
assert stats["call_count"] == 3
|
|
75
|
+
assert stats["input_tokens"] == 300
|
|
76
|
+
assert stats["output_tokens"] == 150
|
|
77
|
+
assert abs(stats["total_cost"] - 0.03) < 1e-9
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_save_checkpoint_per_step(storage):
|
|
81
|
+
"""Auto-checkpoint per step: every checkpoint write should be retrievable by step number."""
|
|
82
|
+
session_id = storage.start_session()
|
|
83
|
+
messages_step1 = [{"role": "user", "content": "step 1"}]
|
|
84
|
+
messages_step2 = [
|
|
85
|
+
{"role": "user", "content": "step 1"},
|
|
86
|
+
{"role": "assistant", "content": "response 1"},
|
|
87
|
+
{"role": "user", "content": "step 2"},
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
storage.save_checkpoint(session_id, step_number=1, messages=messages_step1)
|
|
91
|
+
storage.save_checkpoint(session_id, step_number=2, messages=messages_step2)
|
|
92
|
+
|
|
93
|
+
latest = storage.get_latest_checkpoint(session_id)
|
|
94
|
+
|
|
95
|
+
assert latest is not None
|
|
96
|
+
assert latest["step_number"] == 2
|
|
97
|
+
assert latest["messages"] == messages_step2
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_resume_from_checkpoint_returns_latest_messages(storage):
|
|
101
|
+
session_id = storage.start_session()
|
|
102
|
+
messages = [{"role": "user", "content": "resume me"}]
|
|
103
|
+
|
|
104
|
+
storage.save_checkpoint(session_id, step_number=1, messages=messages)
|
|
105
|
+
resumed = storage.resume_from_checkpoint(session_id)
|
|
106
|
+
|
|
107
|
+
assert resumed == messages
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_resume_from_checkpoint_no_checkpoint_returns_empty(storage):
|
|
111
|
+
session_id = storage.start_session()
|
|
112
|
+
|
|
113
|
+
resumed = storage.resume_from_checkpoint(session_id)
|
|
114
|
+
|
|
115
|
+
assert resumed == []
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_get_session_stats_tracks_biggest_waste(storage):
|
|
119
|
+
session_id = storage.start_session()
|
|
120
|
+
|
|
121
|
+
storage.record_call(
|
|
122
|
+
session_id=session_id,
|
|
123
|
+
provider="openai",
|
|
124
|
+
model="gpt-4",
|
|
125
|
+
input_tokens=50,
|
|
126
|
+
output_tokens=20,
|
|
127
|
+
cost=0.005,
|
|
128
|
+
reused_tokens=5,
|
|
129
|
+
waste_category="repeated system prompt",
|
|
130
|
+
messages=[{"role": "system", "content": "You are helpful."}],
|
|
131
|
+
)
|
|
132
|
+
storage.record_call(
|
|
133
|
+
session_id=session_id,
|
|
134
|
+
provider="openai",
|
|
135
|
+
model="gpt-4",
|
|
136
|
+
input_tokens=50,
|
|
137
|
+
output_tokens=20,
|
|
138
|
+
cost=0.005,
|
|
139
|
+
reused_tokens=5,
|
|
140
|
+
waste_category="repeated system prompt",
|
|
141
|
+
messages=[{"role": "system", "content": "You are helpful."}],
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
stats = storage.get_session_stats(session_id)
|
|
145
|
+
|
|
146
|
+
assert stats["biggest_waste"] == "repeated system prompt"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_tracker_start_stop_creates_and_ends_session():
|
|
150
|
+
"""Full integration: LLMTracker.start()/stop() should create a real streamed session."""
|
|
151
|
+
tracker = get_tracker()
|
|
152
|
+
|
|
153
|
+
tracker.start()
|
|
154
|
+
session_id = tracker.get_session_id()
|
|
155
|
+
|
|
156
|
+
assert session_id is not None
|
|
157
|
+
|
|
158
|
+
tracker.stop()
|
|
159
|
+
# After stop, the active session should be cleared/ended without error
|
|
160
|
+
assert tracker.state.active is False
|