testthread 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
testthread/__init__.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
class TestThread:
|
|
4
|
+
def __init__(self, base_url="https://test-thread-production.up.railway.app"):
|
|
5
|
+
self.base = base_url.rstrip("/")
|
|
6
|
+
|
|
7
|
+
def create_suite(self, name, agent_endpoint, description=None):
|
|
8
|
+
res = requests.post(f"{self.base}/suites", json={
|
|
9
|
+
"name": name,
|
|
10
|
+
"description": description,
|
|
11
|
+
"agent_endpoint": agent_endpoint
|
|
12
|
+
})
|
|
13
|
+
return res.json()
|
|
14
|
+
|
|
15
|
+
def add_case(self, suite_id, name, input, expected_output, match_type="contains", description=None):
|
|
16
|
+
res = requests.post(f"{self.base}/suites/{suite_id}/cases", json={
|
|
17
|
+
"name": name,
|
|
18
|
+
"description": description,
|
|
19
|
+
"input": input,
|
|
20
|
+
"expected_output": expected_output,
|
|
21
|
+
"match_type": match_type
|
|
22
|
+
})
|
|
23
|
+
return res.json()
|
|
24
|
+
|
|
25
|
+
def run_suite(self, suite_id):
|
|
26
|
+
res = requests.post(f"{self.base}/suites/{suite_id}/run")
|
|
27
|
+
return res.json()
|
|
28
|
+
|
|
29
|
+
def get_run(self, run_id):
|
|
30
|
+
res = requests.get(f"{self.base}/runs/{run_id}")
|
|
31
|
+
return res.json()
|
|
32
|
+
|
|
33
|
+
def list_suites(self):
|
|
34
|
+
res = requests.get(f"{self.base}/suites")
|
|
35
|
+
return res.json()
|
|
36
|
+
|
|
37
|
+
def list_runs(self):
|
|
38
|
+
res = requests.get(f"{self.base}/runs")
|
|
39
|
+
return res.json()
|
|
40
|
+
|
|
41
|
+
def stats(self):
|
|
42
|
+
res = requests.get(f"{self.base}/dashboard/stats")
|
|
43
|
+
return res.json()
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: testthread
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: pytest for AI agents
|
|
5
|
+
Home-page: https://github.com/eugene001dayne/test-thread
|
|
6
|
+
Author: Eugene Dayne Mawuli
|
|
7
|
+
Author-email:
|
|
8
|
+
License: Apache 2.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: classifier
|
|
16
|
+
Dynamic: description
|
|
17
|
+
Dynamic: description-content-type
|
|
18
|
+
Dynamic: home-page
|
|
19
|
+
Dynamic: license
|
|
20
|
+
Dynamic: requires-dist
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
23
|
+
|
|
24
|
+
# TestThread 🧵
|
|
25
|
+
|
|
26
|
+
**pytest for AI agents.**
|
|
27
|
+
|
|
28
|
+
The open-source testing framework that tells you if your AI agent is actually working — or quietly breaking.
|
|
29
|
+
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
[](https://test-thread-production.up.railway.app)
|
|
32
|
+
[](https://test-thread.lovable.app)
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## The Problem
|
|
37
|
+
|
|
38
|
+
You build an AI agent. It works in testing. You ship it.
|
|
39
|
+
|
|
40
|
+
Then it starts hallucinating. Returning wrong formats. Calling the wrong tools. Breaking your pipeline.
|
|
41
|
+
|
|
42
|
+
You find out when something downstream crashes — not before.
|
|
43
|
+
|
|
44
|
+
**TestThread fixes that.**
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## What TestThread Does
|
|
49
|
+
|
|
50
|
+
Define what your agent *should* do. TestThread runs it, checks the output, and tells you exactly what passed and what failed.
|
|
51
|
+
|
|
52
|
+
- ✅ Define test suites per agent
|
|
53
|
+
- ✅ Add test cases with expected outputs
|
|
54
|
+
- ✅ Run suites against your live agent endpoint
|
|
55
|
+
- ✅ Get pass/fail results with reasons
|
|
56
|
+
- ✅ Track pass rate over time
|
|
57
|
+
- ✅ Catch regressions before they hit production
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Quick Start
|
|
62
|
+
```bash
|
|
63
|
+
pip install requests
|
|
64
|
+
```
|
|
65
|
+
```python
|
|
66
|
+
import requests
|
|
67
|
+
|
|
68
|
+
BASE = "https://test-thread-production.up.railway.app"
|
|
69
|
+
|
|
70
|
+
# Create a test suite
|
|
71
|
+
suite = requests.post(f"{BASE}/suites", json={
|
|
72
|
+
"name": "My Agent Tests",
|
|
73
|
+
"description": "Testing my AI agent",
|
|
74
|
+
"agent_endpoint": "https://your-agent.com/run"
|
|
75
|
+
}).json()
|
|
76
|
+
|
|
77
|
+
# Add a test case
|
|
78
|
+
requests.post(f"{BASE}/suites/{suite['id']}/cases", json={
|
|
79
|
+
"name": "Basic response check",
|
|
80
|
+
"input": "What is 2 + 2?",
|
|
81
|
+
"expected_output": "4",
|
|
82
|
+
"match_type": "contains"
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
# Run the suite
|
|
86
|
+
result = requests.post(f"{BASE}/suites/{suite['id']}/run").json()
|
|
87
|
+
print(f"Passed: {result['passed']} | Failed: {result['failed']}")
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Match Types
|
|
93
|
+
|
|
94
|
+
| Type | Description |
|
|
95
|
+
|------|-------------|
|
|
96
|
+
| `contains` | Output contains the expected string |
|
|
97
|
+
| `exact` | Output matches exactly |
|
|
98
|
+
| `regex` | Output matches a regex pattern |
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Live Dashboard
|
|
103
|
+
|
|
104
|
+
View your test results visually at **[test-thread.lovable.app](https://test-thread.lovable.app)**
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## API Reference
|
|
109
|
+
|
|
110
|
+
Full docs at **[test-thread-production.up.railway.app/docs](https://test-thread-production.up.railway.app/docs)**
|
|
111
|
+
|
|
112
|
+
| Method | Endpoint | Description |
|
|
113
|
+
|--------|----------|-------------|
|
|
114
|
+
| GET | `/` | Health check |
|
|
115
|
+
| POST | `/suites` | Create test suite |
|
|
116
|
+
| GET | `/suites` | List all suites |
|
|
117
|
+
| POST | `/suites/{id}/cases` | Add test case |
|
|
118
|
+
| GET | `/suites/{id}/cases` | List test cases |
|
|
119
|
+
| POST | `/suites/{id}/run` | Run suite |
|
|
120
|
+
| GET | `/runs` | List all runs |
|
|
121
|
+
| GET | `/runs/{id}` | Get run details |
|
|
122
|
+
| GET | `/dashboard/stats` | Dashboard stats |
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Part of the Thread Suite
|
|
127
|
+
|
|
128
|
+
TestThread is part of a suite of open-source reliability tools for AI agents.
|
|
129
|
+
|
|
130
|
+
| Tool | What it does |
|
|
131
|
+
|------|-------------|
|
|
132
|
+
| [Iron-Thread](https://github.com/eugene001dayne/iron-thread) | Validates AI output structure before it hits your database |
|
|
133
|
+
| **TestThread** | Tests whether your agent behaves correctly across runs |
|
|
134
|
+
| PromptThread *(coming soon)* | Versions and tracks prompt performance over time |
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Self-Host
|
|
139
|
+
```bash
|
|
140
|
+
git clone https://github.com/eugene001dayne/test-thread.git
|
|
141
|
+
cd test-thread
|
|
142
|
+
pip install -r requirements.txt
|
|
143
|
+
uvicorn main:app --reload
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## License
|
|
149
|
+
|
|
150
|
+
Apache 2.0 — free to use, modify, and distribute.
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
Built for developers who ship AI agents and need to know they work.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
testthread/__init__.py,sha256=PMEBdWN48NHA5IfwNeD12XnlbDIxcaS0_biSq6Ze-zQ,1425
|
|
2
|
+
testthread-0.1.0.dist-info/METADATA,sha256=dndYCZQ-dmGf2-IfSWRKU3Gvh6uCPZvRBei295QAq3s,4284
|
|
3
|
+
testthread-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
4
|
+
testthread-0.1.0.dist-info/top_level.txt,sha256=iBsCbWZt8eVw__XoEkwaf0n708-1cSe7R6mAQtmw1Ms,11
|
|
5
|
+
testthread-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
testthread
|