traceforge 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. traceforge-0.2.0/LICENSE +21 -0
  2. traceforge-0.2.0/PKG-INFO +236 -0
  3. traceforge-0.2.0/README.md +204 -0
  4. traceforge-0.2.0/pyproject.toml +51 -0
  5. traceforge-0.2.0/setup.cfg +4 -0
  6. traceforge-0.2.0/src/traceforge/__init__.py +3 -0
  7. traceforge-0.2.0/src/traceforge/attribution.py +358 -0
  8. traceforge-0.2.0/src/traceforge/cli.py +588 -0
  9. traceforge-0.2.0/src/traceforge/evaluator.py +338 -0
  10. traceforge-0.2.0/src/traceforge/fuzzer.py +132 -0
  11. traceforge-0.2.0/src/traceforge/harness.py +228 -0
  12. traceforge-0.2.0/src/traceforge/history.py +110 -0
  13. traceforge-0.2.0/src/traceforge/html_report.py +245 -0
  14. traceforge-0.2.0/src/traceforge/invariants.py +477 -0
  15. traceforge-0.2.0/src/traceforge/judge.py +85 -0
  16. traceforge-0.2.0/src/traceforge/loader.py +97 -0
  17. traceforge-0.2.0/src/traceforge/minrepro.py +107 -0
  18. traceforge-0.2.0/src/traceforge/mock_tools.py +27 -0
  19. traceforge-0.2.0/src/traceforge/models.py +354 -0
  20. traceforge-0.2.0/src/traceforge/mutators.py +140 -0
  21. traceforge-0.2.0/src/traceforge/replay.py +92 -0
  22. traceforge-0.2.0/src/traceforge/reporter.py +220 -0
  23. traceforge-0.2.0/src/traceforge/trace_ir.py +32 -0
  24. traceforge-0.2.0/src/traceforge/trace_store.py +132 -0
  25. traceforge-0.2.0/src/traceforge/utils.py +15 -0
  26. traceforge-0.2.0/src/traceforge.egg-info/PKG-INFO +236 -0
  27. traceforge-0.2.0/src/traceforge.egg-info/SOURCES.txt +45 -0
  28. traceforge-0.2.0/src/traceforge.egg-info/dependency_links.txt +1 -0
  29. traceforge-0.2.0/src/traceforge.egg-info/entry_points.txt +2 -0
  30. traceforge-0.2.0/src/traceforge.egg-info/requires.txt +13 -0
  31. traceforge-0.2.0/src/traceforge.egg-info/top_level.txt +1 -0
  32. traceforge-0.2.0/tests/test_attribution.py +336 -0
  33. traceforge-0.2.0/tests/test_evaluator.py +282 -0
  34. traceforge-0.2.0/tests/test_fuzzer.py +168 -0
  35. traceforge-0.2.0/tests/test_harness.py +184 -0
  36. traceforge-0.2.0/tests/test_history.py +104 -0
  37. traceforge-0.2.0/tests/test_invariants.py +352 -0
  38. traceforge-0.2.0/tests/test_judge.py +96 -0
  39. traceforge-0.2.0/tests/test_loader.py +181 -0
  40. traceforge-0.2.0/tests/test_minrepro.py +249 -0
  41. traceforge-0.2.0/tests/test_mock_tools.py +56 -0
  42. traceforge-0.2.0/tests/test_models.py +290 -0
  43. traceforge-0.2.0/tests/test_mutators.py +145 -0
  44. traceforge-0.2.0/tests/test_replay.py +185 -0
  45. traceforge-0.2.0/tests/test_reporter.py +143 -0
  46. traceforge-0.2.0/tests/test_trace_ir.py +126 -0
  47. traceforge-0.2.0/tests/test_trace_store.py +130 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abhimanyu Bhagwati
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: traceforge
3
+ Version: 0.2.0
4
+ Summary: Test harness for AI agents that call tools. Record, replay, fuzz, and debug.
5
+ Author-email: Abhimanyu Bhagwati <abhimanyu@vt.edu>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/AbhimanyuBhagwati/TraceForge
8
+ Project-URL: Repository, https://github.com/AbhimanyuBhagwati/TraceForge
9
+ Project-URL: Issues, https://github.com/AbhimanyuBhagwati/TraceForge/issues
10
+ Keywords: ai,agents,testing,llm,ollama,evaluation,fuzzing,replay,debugging
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Software Development :: Testing
16
+ Classifier: Topic :: Software Development :: Debuggers
17
+ Requires-Python: >=3.12
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0; extra == "dev"
22
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
23
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
24
+ Provides-Extra: dependencies
25
+ Requires-Dist: click>=8.0; extra == "dependencies"
26
+ Requires-Dist: ollama>=0.1.6; extra == "dependencies"
27
+ Requires-Dist: pydantic>=2.0; extra == "dependencies"
28
+ Requires-Dist: pyyaml>=6.0; extra == "dependencies"
29
+ Requires-Dist: rich>=13.0; extra == "dependencies"
30
+ Requires-Dist: jinja2>=3.0; extra == "dependencies"
31
+ Dynamic: license-file
32
+
33
+ # TraceForge
34
+
35
+ A test harness for AI agents that call tools.
36
+
37
+ If you're building agents with tool-calling (on Ollama, local models, etc.) and you're tired of staring at logs trying to figure out why your agent called the wrong tool or returned garbage — this is for you.
38
+
39
+ ## What it does
40
+
41
+ You write a YAML file describing what your agent should do. TraceForge runs it, records everything, and then lets you analyze the recordings without re-running the model.
42
+
43
+ ```yaml
44
+ name: calculator_agent
45
+ agent:
46
+ model: qwen2.5:7b-instruct
47
+ system_prompt: "You are a calculator assistant."
48
+ tools:
49
+ - name: calculate
50
+ description: "Perform a math calculation"
51
+ parameters:
52
+ type: object
53
+ properties:
54
+ expression: { type: string }
55
+ required: [expression]
56
+ mock_responses: [{ result: 42 }]
57
+
58
+ steps:
59
+ - user_message: "What is 6 times 7?"
60
+ expectations:
61
+ - type: tool_called
62
+ tool: calculate
63
+ - type: response_contains
64
+ values: ["42"]
65
+ ```
66
+
67
+ ```
68
+ $ traceforge run ./scenarios/ --runs 10
69
+
70
+ ╭───────────────────── TraceForge Report ──────────────────────╮
71
+ │ SCENARIO PASS FAIL RATE CONSIST AVG MS │
72
+ │ OK calculator_agent 10/10 0/10 100% 1.00 1,059 │
73
+ │ XX multi_step_math 0/10 10/10 0% 1.00 3,598 │
74
+ │ OK simple_chat 10/10 0/10 100% 1.00 898 │
75
+ │ OK weather_agent 10/10 0/10 100% 1.00 1,246 │
76
+ │ │
77
+ │ OVERALL: 75.0% pass rate │
78
+ ╰──────────────────────────────────────────────────────────────╯
79
+ ```
80
+
81
+ ## The idea
82
+
83
+ Running an LLM is expensive and slow. But once you have a recording of what it did, you can re-evaluate it instantly, fuzz it, minimize it, and analyze it — all offline.
84
+
85
+ TraceForge records every agent run as an immutable, content-addressed trace (SHA-256 hashed). Then it gives you tools to work with those traces:
86
+
87
+ - **Replay** — re-evaluate a trace with different expectations, no model needed
88
+ - **Fuzz** — mutate tool responses (nulls, type swaps, empty strings) and see what breaks your agent
89
+ - **MinRepro** — your agent runs 4 steps and fails; delta debugging finds the 1 step that actually matters
90
+ - **Mine** — automatically discover behavioral rules from passing traces ("calculate is always called at step 0", "expression is always non-empty")
91
+ - **Attribute** — when something fails, run counterfactual experiments to find out why ("the agent is sensitive to tool output values, not format")
92
+
93
+ ## Install
94
+
95
+ ```bash
96
+ pip install traceforge
97
+ ```
98
+
99
+ Or from source:
100
+
101
+ ```bash
102
+ git clone https://github.com/AbhimanyuBhagwati/TraceForge.git
103
+ cd TraceForge
104
+ pip install -e ".[dev]"
105
+ ```
106
+
107
+ You'll need [Ollama](https://ollama.com/) running locally with a model pulled:
108
+
109
+ ```bash
110
+ ollama pull qwen2.5:7b-instruct
111
+ ```
112
+
113
+ ## Quick start
114
+
115
+ ```bash
116
+ # Create example scenarios
117
+ traceforge init
118
+
119
+ # Run them
120
+ traceforge run ./examples/scenarios/ --runs 5
121
+
122
+ # See what you've got
123
+ traceforge traces
124
+ traceforge info
125
+
126
+ # Replay a trace offline (no model call)
127
+ traceforge replay <trace-id>
128
+
129
+ # Fuzz tool responses
130
+ traceforge fuzz ./examples/scenarios/
131
+
132
+ # Find minimal failing case
133
+ traceforge minrepro <failing-trace-id> --scenario ./examples/scenarios/
134
+
135
+ # Discover behavioral patterns
136
+ traceforge mine calculator_agent -v
137
+
138
+ # Find root cause of failure
139
+ traceforge attribute <failing-trace-id> --scenario ./examples/scenarios/
140
+ ```
141
+
142
+ ## How it works
143
+
144
+ ```
145
+ YAML scenario
146
+ |
147
+ v
148
+ traceforge run -> traces (content-addressed, stored locally)
149
+ |
150
+ v
151
+ traceforge replay -> re-evaluate offline
152
+ traceforge fuzz -> break tool responses, find fragility
153
+ traceforge minrepro -> shrink failing trace to minimal case
154
+ traceforge mine -> discover behavioral rules from traces
155
+ traceforge attribute -> counterfactual analysis of failures
156
+ |
157
+ v
158
+ CLI output / HTML report / JSON export
159
+ ```
160
+
161
+ Everything after `run` works on stored traces. Run the model once, analyze as many times as you want.
162
+
163
+ ## Expectations
164
+
165
+ 10 built-in expectation types you can use in your YAML:
166
+
167
+ | Type | What it checks |
168
+ |------|---------------|
169
+ | `tool_called` | Agent called this tool |
170
+ | `tool_not_called` | Agent didn't call this tool |
171
+ | `tool_args_contain` | Tool was called with these arguments |
172
+ | `response_contains` | Agent's response includes these strings |
173
+ | `response_not_contains` | Agent's response doesn't include this |
174
+ | `response_matches_regex` | Response matches a regex |
175
+ | `llm_judge` | Another LLM evaluates the response |
176
+ | `latency_under` | Step completed within N ms |
177
+ | `no_tool_errors` | No tool calls returned errors |
178
+ | `tool_call_count` | Tool was called exactly/at least/at most N times |
179
+
180
+ ## Invariant mining
181
+
182
+ Instead of writing expectations by hand, let TraceForge figure them out:
183
+
184
+ ```bash
185
+ $ traceforge mine calculator_agent -v
186
+
187
+ ╭────────────── Invariant Mining Report ───────────────╮
188
+ │ Traces analyzed: 15 (15 passing, 0 failing) │
189
+ │ Invariants discovered: 5 │
190
+ │ │
191
+ │ - 'calculate' is always called at step 0 │
192
+ │ - 'calculate' is called 1-5 times per run │
193
+ │ - 'calculate.expression' is always non-empty │
194
+ │ - Step 0 response length is 30-48 chars │
195
+ │ - Step 0 latency is under 3916ms │
196
+ ╰──────────────────────────────────────────────────────╯
197
+ ```
198
+
199
+ Run enough traces and the miner will find rules that hold in all passing traces but break in failing ones. Those are your bugs.
200
+
201
+ ## Causal attribution
202
+
203
+ When a trace fails, TraceForge can run counterfactual experiments — change one thing at a time, re-run the agent, and see what flips the outcome.
204
+
205
+ ```bash
206
+ $ traceforge attribute <trace-id> --scenario ./scenarios/
207
+
208
+ ╭────────────── Causal Attribution Report ─────────────╮
209
+ │ Failing step: 2 | Interventions: 23 | Flips: 7 │
210
+ │ │
211
+ │ CAUSAL FACTOR SENSITIVITY │
212
+ │ tool_output_value 40% │
213
+ │ tool_output_format 0% │
214
+ │ system_prompt_clause 0% │
215
+ ╰──────────────────────────────────────────────────────╯
216
+ ```
217
+
218
+ "40% of value changes flipped the outcome. Format and prompt don't matter." Now you know where to look.
219
+
220
+ ## Requirements
221
+
222
+ - Python 3.12+
223
+ - Ollama running locally
224
+ - A pulled model (tested with `qwen2.5:7b-instruct`)
225
+
226
+ ## Tests
227
+
228
+ ```bash
229
+ pytest tests/ -v
230
+ ```
231
+
232
+ 183 tests, runs in about a second.
233
+
234
+ ## License
235
+
236
+ MIT
@@ -0,0 +1,204 @@
1
+ # TraceForge
2
+
3
+ A test harness for AI agents that call tools.
4
+
5
+ If you're building agents with tool-calling (on Ollama, local models, etc.) and you're tired of staring at logs trying to figure out why your agent called the wrong tool or returned garbage — this is for you.
6
+
7
+ ## What it does
8
+
9
+ You write a YAML file describing what your agent should do. TraceForge runs it, records everything, and then lets you analyze the recordings without re-running the model.
10
+
11
+ ```yaml
12
+ name: calculator_agent
13
+ agent:
14
+ model: qwen2.5:7b-instruct
15
+ system_prompt: "You are a calculator assistant."
16
+ tools:
17
+ - name: calculate
18
+ description: "Perform a math calculation"
19
+ parameters:
20
+ type: object
21
+ properties:
22
+ expression: { type: string }
23
+ required: [expression]
24
+ mock_responses: [{ result: 42 }]
25
+
26
+ steps:
27
+ - user_message: "What is 6 times 7?"
28
+ expectations:
29
+ - type: tool_called
30
+ tool: calculate
31
+ - type: response_contains
32
+ values: ["42"]
33
+ ```
34
+
35
+ ```
36
+ $ traceforge run ./scenarios/ --runs 10
37
+
38
+ ╭───────────────────── TraceForge Report ──────────────────────╮
39
+ │ SCENARIO PASS FAIL RATE CONSIST AVG MS │
40
+ │ OK calculator_agent 10/10 0/10 100% 1.00 1,059 │
41
+ │ XX multi_step_math 0/10 10/10 0% 1.00 3,598 │
42
+ │ OK simple_chat 10/10 0/10 100% 1.00 898 │
43
+ │ OK weather_agent 10/10 0/10 100% 1.00 1,246 │
44
+ │ │
45
+ │ OVERALL: 75.0% pass rate │
46
+ ╰──────────────────────────────────────────────────────────────╯
47
+ ```
48
+
49
+ ## The idea
50
+
51
+ Running an LLM is expensive and slow. But once you have a recording of what it did, you can re-evaluate it instantly, fuzz it, minimize it, and analyze it — all offline.
52
+
53
+ TraceForge records every agent run as an immutable, content-addressed trace (SHA-256 hashed). Then it gives you tools to work with those traces:
54
+
55
+ - **Replay** — re-evaluate a trace with different expectations, no model needed
56
+ - **Fuzz** — mutate tool responses (nulls, type swaps, empty strings) and see what breaks your agent
57
+ - **MinRepro** — your agent runs 4 steps and fails; delta debugging finds the 1 step that actually matters
58
+ - **Mine** — automatically discover behavioral rules from passing traces ("calculate is always called at step 0", "expression is always non-empty")
59
+ - **Attribute** — when something fails, run counterfactual experiments to find out why ("the agent is sensitive to tool output values, not format")
60
+
61
+ ## Install
62
+
63
+ ```bash
64
+ pip install traceforge
65
+ ```
66
+
67
+ Or from source:
68
+
69
+ ```bash
70
+ git clone https://github.com/AbhimanyuBhagwati/TraceForge.git
71
+ cd TraceForge
72
+ pip install -e ".[dev]"
73
+ ```
74
+
75
+ You'll need [Ollama](https://ollama.com/) running locally with a model pulled:
76
+
77
+ ```bash
78
+ ollama pull qwen2.5:7b-instruct
79
+ ```
80
+
81
+ ## Quick start
82
+
83
+ ```bash
84
+ # Create example scenarios
85
+ traceforge init
86
+
87
+ # Run them
88
+ traceforge run ./examples/scenarios/ --runs 5
89
+
90
+ # See what you've got
91
+ traceforge traces
92
+ traceforge info
93
+
94
+ # Replay a trace offline (no model call)
95
+ traceforge replay <trace-id>
96
+
97
+ # Fuzz tool responses
98
+ traceforge fuzz ./examples/scenarios/
99
+
100
+ # Find minimal failing case
101
+ traceforge minrepro <failing-trace-id> --scenario ./examples/scenarios/
102
+
103
+ # Discover behavioral patterns
104
+ traceforge mine calculator_agent -v
105
+
106
+ # Find root cause of failure
107
+ traceforge attribute <failing-trace-id> --scenario ./examples/scenarios/
108
+ ```
109
+
110
+ ## How it works
111
+
112
+ ```
113
+ YAML scenario
114
+ |
115
+ v
116
+ traceforge run -> traces (content-addressed, stored locally)
117
+ |
118
+ v
119
+ traceforge replay -> re-evaluate offline
120
+ traceforge fuzz -> break tool responses, find fragility
121
+ traceforge minrepro -> shrink failing trace to minimal case
122
+ traceforge mine -> discover behavioral rules from traces
123
+ traceforge attribute -> counterfactual analysis of failures
124
+ |
125
+ v
126
+ CLI output / HTML report / JSON export
127
+ ```
128
+
129
+ Everything after `run` works on stored traces. Run the model once, analyze as many times as you want.
130
+
131
+ ## Expectations
132
+
133
+ 10 built-in expectation types you can use in your YAML:
134
+
135
+ | Type | What it checks |
136
+ |------|---------------|
137
+ | `tool_called` | Agent called this tool |
138
+ | `tool_not_called` | Agent didn't call this tool |
139
+ | `tool_args_contain` | Tool was called with these arguments |
140
+ | `response_contains` | Agent's response includes these strings |
141
+ | `response_not_contains` | Agent's response doesn't include this |
142
+ | `response_matches_regex` | Response matches a regex |
143
+ | `llm_judge` | Another LLM evaluates the response |
144
+ | `latency_under` | Step completed within N ms |
145
+ | `no_tool_errors` | No tool calls returned errors |
146
+ | `tool_call_count` | Tool was called exactly/at least/at most N times |
147
+
148
+ ## Invariant mining
149
+
150
+ Instead of writing expectations by hand, let TraceForge figure them out:
151
+
152
+ ```bash
153
+ $ traceforge mine calculator_agent -v
154
+
155
+ ╭────────────── Invariant Mining Report ───────────────╮
156
+ │ Traces analyzed: 15 (15 passing, 0 failing) │
157
+ │ Invariants discovered: 5 │
158
+ │ │
159
+ │ - 'calculate' is always called at step 0 │
160
+ │ - 'calculate' is called 1-5 times per run │
161
+ │ - 'calculate.expression' is always non-empty │
162
+ │ - Step 0 response length is 30-48 chars │
163
+ │ - Step 0 latency is under 3916ms │
164
+ ╰──────────────────────────────────────────────────────╯
165
+ ```
166
+
167
+ Run enough traces and the miner will find rules that hold in all passing traces but break in failing ones. Those are your bugs.
168
+
169
+ ## Causal attribution
170
+
171
+ When a trace fails, TraceForge can run counterfactual experiments — change one thing at a time, re-run the agent, and see what flips the outcome.
172
+
173
+ ```bash
174
+ $ traceforge attribute <trace-id> --scenario ./scenarios/
175
+
176
+ ╭────────────── Causal Attribution Report ─────────────╮
177
+ │ Failing step: 2 | Interventions: 23 | Flips: 7 │
178
+ │ │
179
+ │ CAUSAL FACTOR SENSITIVITY │
180
+ │ tool_output_value 40% │
181
+ │ tool_output_format 0% │
182
+ │ system_prompt_clause 0% │
183
+ ╰──────────────────────────────────────────────────────╯
184
+ ```
185
+
186
+ "40% of value changes flipped the outcome. Format and prompt don't matter." Now you know where to look.
187
+
188
+ ## Requirements
189
+
190
+ - Python 3.12+
191
+ - Ollama running locally
192
+ - A pulled model (tested with `qwen2.5:7b-instruct`)
193
+
194
+ ## Tests
195
+
196
+ ```bash
197
+ pytest tests/ -v
198
+ ```
199
+
200
+ 183 tests, runs in about a second.
201
+
202
+ ## License
203
+
204
+ MIT
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "traceforge"
7
+ version = "0.2.0"
8
+ description = "Test harness for AI agents that call tools. Record, replay, fuzz, and debug."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.12"
12
+ authors = [{name = "Abhimanyu Bhagwati", email = "abhimanyu@vt.edu"}]
13
+ keywords = ["ai", "agents", "testing", "llm", "ollama", "evaluation", "fuzzing", "replay", "debugging"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Software Development :: Testing",
20
+ "Topic :: Software Development :: Debuggers",
21
+ ]
22
+
23
+ [project.urls]
24
+ Homepage = "https://github.com/AbhimanyuBhagwati/TraceForge"
25
+ Repository = "https://github.com/AbhimanyuBhagwati/TraceForge"
26
+ Issues = "https://github.com/AbhimanyuBhagwati/TraceForge/issues"
27
+
28
+ [project.optional-dependencies]
29
+ dev = [
30
+ "pytest>=8.0",
31
+ "pytest-asyncio>=0.23",
32
+ "pytest-cov>=4.0",
33
+ ]
34
+
35
+ dependencies = [
36
+ "click>=8.0",
37
+ "ollama>=0.1.6",
38
+ "pydantic>=2.0",
39
+ "pyyaml>=6.0",
40
+ "rich>=13.0",
41
+ "jinja2>=3.0",
42
+ ]
43
+
44
+ [project.scripts]
45
+ traceforge = "traceforge.cli:main"
46
+
47
+ [tool.setuptools.packages.find]
48
+ where = ["src"]
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """TraceForge: Deterministic replay, fuzzing & failure minimization for local AI agents."""
2
+
3
+ __version__ = "0.2.0"