synkro 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (58) hide show
  1. synkro/__init__.py +165 -0
  2. synkro/cli.py +120 -0
  3. synkro/core/__init__.py +7 -0
  4. synkro/core/dataset.py +233 -0
  5. synkro/core/policy.py +337 -0
  6. synkro/errors.py +178 -0
  7. synkro/examples/__init__.py +148 -0
  8. synkro/factory.py +160 -0
  9. synkro/formatters/__init__.py +12 -0
  10. synkro/formatters/qa.py +85 -0
  11. synkro/formatters/sft.py +90 -0
  12. synkro/formatters/tool_call.py +127 -0
  13. synkro/generation/__init__.py +9 -0
  14. synkro/generation/generator.py +163 -0
  15. synkro/generation/planner.py +87 -0
  16. synkro/generation/responses.py +160 -0
  17. synkro/generation/scenarios.py +90 -0
  18. synkro/generation/tool_responses.py +370 -0
  19. synkro/generation/tool_simulator.py +114 -0
  20. synkro/llm/__init__.py +7 -0
  21. synkro/llm/client.py +235 -0
  22. synkro/llm/rate_limits.py +95 -0
  23. synkro/models/__init__.py +43 -0
  24. synkro/models/anthropic.py +26 -0
  25. synkro/models/google.py +19 -0
  26. synkro/models/openai.py +31 -0
  27. synkro/modes/__init__.py +15 -0
  28. synkro/modes/config.py +66 -0
  29. synkro/modes/qa.py +18 -0
  30. synkro/modes/sft.py +18 -0
  31. synkro/modes/tool_call.py +18 -0
  32. synkro/parsers.py +442 -0
  33. synkro/pipeline/__init__.py +20 -0
  34. synkro/pipeline/phases.py +237 -0
  35. synkro/pipeline/runner.py +198 -0
  36. synkro/pipelines.py +105 -0
  37. synkro/prompts/__init__.py +44 -0
  38. synkro/prompts/base.py +167 -0
  39. synkro/prompts/qa_templates.py +97 -0
  40. synkro/prompts/templates.py +281 -0
  41. synkro/prompts/tool_templates.py +201 -0
  42. synkro/quality/__init__.py +14 -0
  43. synkro/quality/grader.py +130 -0
  44. synkro/quality/refiner.py +137 -0
  45. synkro/quality/tool_grader.py +126 -0
  46. synkro/quality/tool_refiner.py +128 -0
  47. synkro/reporting.py +213 -0
  48. synkro/schemas.py +325 -0
  49. synkro/types/__init__.py +41 -0
  50. synkro/types/core.py +113 -0
  51. synkro/types/dataset_type.py +30 -0
  52. synkro/types/tool.py +94 -0
  53. synkro-0.4.5.data/data/examples/__init__.py +148 -0
  54. synkro-0.4.5.dist-info/METADATA +221 -0
  55. synkro-0.4.5.dist-info/RECORD +58 -0
  56. synkro-0.4.5.dist-info/WHEEL +4 -0
  57. synkro-0.4.5.dist-info/entry_points.txt +2 -0
  58. synkro-0.4.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,201 @@
1
+ """Prompt templates for tool call trace generation."""
2
+
3
+ # =============================================================================
4
+ # TOOL SCENARIO GENERATION
5
+ # =============================================================================
6
+
7
+ TOOL_SCENARIO_PROMPT = """You are an expert at creating realistic scenarios that require tool usage.
8
+
9
+ Given a set of available tools and usage guidelines, generate diverse scenarios that test when and how to use these tools correctly.
10
+
11
+ AVAILABLE TOOLS:
12
+ {TOOLS_DESCRIPTION}
13
+
14
+ USAGE GUIDELINES:
15
+ {GUIDELINES}
16
+
17
+ Generate scenarios that cover:
18
+
19
+ 1. **Clear Tool Use Cases** - Situations where a specific tool is clearly needed
20
+ 2. **Tool Selection** - Scenarios requiring choosing between multiple tools
21
+ 3. **No Tool Needed** - Cases where the assistant should respond directly without tools
22
+ 4. **Multi-Tool Workflows** - Complex tasks requiring multiple tool calls
23
+ 5. **Parameter Variations** - Different parameter combinations and edge cases
24
+ 6. **Error Handling** - What to do when tools return errors or unexpected results
25
+
26
+ Each scenario should include:
27
+ - A realistic user request
28
+ - Context about what information is available vs what needs to be looked up
29
+ - Expected tool usage pattern (or lack thereof)
30
+
31
+ Focus on creating "golden traces" - perfect examples of correct tool usage."""
32
+
33
+ TOOL_CATEGORY_SCENARIO_PROMPT = """You are an expert at creating realistic scenarios for tool usage.
34
+
35
+ Generate scenarios specifically for the following CATEGORY:
36
+ **Category Name**: {CATEGORY_NAME}
37
+ **Category Description**: {CATEGORY_DESCRIPTION}
38
+
39
+ AVAILABLE TOOLS:
40
+ {TOOLS_DESCRIPTION}
41
+
42
+ USAGE GUIDELINES:
43
+ {GUIDELINES}
44
+
45
+ Create scenarios that:
46
+ - Are deeply relevant to this specific category
47
+ - Test the nuances of tool usage in this context
48
+ - Include realistic user requests with appropriate context
49
+ - Cover both happy paths and edge cases within this category"""
50
+
51
+ # =============================================================================
52
+ # TOOL RESPONSE GENERATION
53
+ # =============================================================================
54
+
55
+ TOOL_RESPONSE_PROMPT = """You are generating a training example for teaching an AI assistant to use tools correctly.
56
+
57
+ AVAILABLE TOOLS:
58
+ {TOOLS_DESCRIPTION}
59
+
60
+ USAGE GUIDELINES:
61
+ {GUIDELINES}
62
+
63
+ SCENARIO:
64
+ {SCENARIO}
65
+
66
+ USER REQUEST:
67
+ {USER_REQUEST}
68
+
69
+ Generate a complete conversation that demonstrates correct tool usage:
70
+
71
+ 1. If a tool should be called:
72
+ - The assistant's first response should include appropriate tool_calls
73
+ - Include the simulated tool response
74
+ - The assistant should then synthesize the tool results into a helpful response
75
+
76
+ 2. If no tool is needed:
77
+ - The assistant should respond directly with helpful information
78
+ - Explain why no tool lookup was necessary
79
+
80
+ The assistant should:
81
+ - Only call tools when necessary (don't call tools for information you already know)
82
+ - Use correct parameters with proper types
83
+ - Wait for tool results before providing final answers
84
+ - Synthesize tool results naturally without exposing raw data
85
+ - Handle missing or partial information gracefully
86
+
87
+ Output as JSON with this structure:
88
+ {{
89
+ "messages": [
90
+ {{"role": "system", "content": "..."}},
91
+ {{"role": "user", "content": "..."}},
92
+ {{"role": "assistant", "content": null, "tool_calls": [...]}}, // if tool needed
93
+ {{"role": "tool", "tool_call_id": "...", "content": "..."}}, // tool result
94
+ {{"role": "assistant", "content": "..."}} // final response
95
+ ]
96
+ }}"""
97
+
98
+ # =============================================================================
99
+ # TOOL GRADING
100
+ # =============================================================================
101
+
102
+ TOOL_GRADE_PROMPT = """You are a strict evaluator of tool usage in AI assistant responses.
103
+
104
+ AVAILABLE TOOLS:
105
+ {TOOLS_DESCRIPTION}
106
+
107
+ USAGE GUIDELINES:
108
+ {GUIDELINES}
109
+
110
+ SCENARIO:
111
+ {SCENARIO}
112
+
113
+ CONVERSATION TO GRADE:
114
+ {CONVERSATION}
115
+
116
+ Evaluate the assistant's tool usage on these criteria:
117
+
118
+ 1. **Tool Selection** (Did they use the right tool?)
119
+ - Chose appropriate tool for the task
120
+ - Didn't use tools when not needed
121
+ - Used all necessary tools
122
+
123
+ 2. **Parameter Accuracy** (Were the parameters correct?)
124
+ - Correct parameter types
125
+ - Sensible parameter values
126
+ - Required parameters included
127
+
128
+ 3. **Response Synthesis** (Did they use tool results correctly?)
129
+ - Accurately incorporated tool results
130
+ - Didn't hallucinate beyond tool data
131
+ - Provided helpful, complete response
132
+
133
+ 4. **Timing** (Did they call tools at the right time?)
134
+ - Called tools before making claims
135
+ - Didn't call tools for known information
136
+ - Efficient tool call ordering
137
+
138
+ A response PASSES only if ALL criteria are met.
139
+
140
+ Grade this response."""
141
+
142
+ # =============================================================================
143
+ # TOOL REFINEMENT
144
+ # =============================================================================
145
+
146
+ TOOL_REFINE_PROMPT = """You are improving a tool-calling conversation that failed quality checks.
147
+
148
+ AVAILABLE TOOLS:
149
+ {TOOLS_DESCRIPTION}
150
+
151
+ USAGE GUIDELINES:
152
+ {GUIDELINES}
153
+
154
+ ORIGINAL SCENARIO:
155
+ {SCENARIO}
156
+
157
+ FAILED CONVERSATION:
158
+ {CONVERSATION}
159
+
160
+ ISSUES FOUND:
161
+ {ISSUES}
162
+
163
+ GRADER FEEDBACK:
164
+ {FEEDBACK}
165
+
166
+ Generate an IMPROVED conversation that fixes all the issues while maintaining the same user request.
167
+
168
+ Focus on:
169
+ - Correct tool selection
170
+ - Accurate parameters
171
+ - Proper synthesis of tool results
172
+ - No hallucination beyond tool data
173
+
174
+ Output the corrected conversation as JSON."""
175
+
176
+ # =============================================================================
177
+ # TOOL SIMULATION
178
+ # =============================================================================
179
+
180
+ TOOL_SIMULATION_PROMPT = """You are simulating a tool response for training data generation.
181
+
182
+ TOOL BEING CALLED:
183
+ Name: {TOOL_NAME}
184
+ Description: {TOOL_DESCRIPTION}
185
+ Parameters: {TOOL_PARAMETERS}
186
+
187
+ CALL ARGUMENTS:
188
+ {ARGUMENTS}
189
+
190
+ EXAMPLE RESPONSES (for reference):
191
+ {MOCK_RESPONSES}
192
+
193
+ Generate a realistic, plausible response that this tool would return for the given arguments.
194
+
195
+ The response should:
196
+ - Be realistic and internally consistent
197
+ - Match the type of data this tool would return
198
+ - Include appropriate detail level
199
+ - Handle edge cases gracefully (e.g., no results found)
200
+
201
+ Return only the tool response content as a string."""
@@ -0,0 +1,14 @@
1
+ """Quality control components for trace grading and refinement."""
2
+
3
+ from synkro.quality.grader import Grader
4
+ from synkro.quality.refiner import Refiner
5
+ from synkro.quality.tool_grader import ToolCallGrader
6
+ from synkro.quality.tool_refiner import ToolCallRefiner
7
+
8
+ __all__ = [
9
+ "Grader",
10
+ "Refiner",
11
+ "ToolCallGrader",
12
+ "ToolCallRefiner",
13
+ ]
14
+
@@ -0,0 +1,130 @@
1
+ """Grading of generated traces for quality control."""
2
+
3
+ from synkro.llm.client import LLM
4
+ from synkro.models import Model, OpenAI
5
+ from synkro.types.core import Trace, GradeResult
6
+ from synkro.prompts.templates import BATCHED_GRADER_PROMPT
7
+ from synkro.schemas import SingleGrade
8
+ from synkro.parsers import parse_batched_grades
9
+
10
+
11
+ class Grader:
12
+ """
13
+ Grades generated traces for quality and policy compliance.
14
+
15
+ Uses an LLM to evaluate each trace against strict criteria:
16
+ - Policy compliance
17
+ - Proper citations
18
+ - Complete reasoning
19
+ - Actionable recommendations
20
+
21
+ Examples:
22
+ >>> grader = Grader()
23
+ >>> result = await grader.grade(trace, policy.text)
24
+ >>> if result.passed:
25
+ ... print("Trace passes quality checks!")
26
+ """
27
+
28
+ def __init__(self, llm: LLM | None = None, model: Model = OpenAI.GPT_4O):
29
+ """
30
+ Initialize the grader.
31
+
32
+ Args:
33
+ llm: LLM client to use (creates one if not provided)
34
+ model: Model to use if creating LLM (recommend stronger model for grading)
35
+ """
36
+ self.llm = llm or LLM(model=model)
37
+
38
+ async def grade(self, trace: Trace, policy_text: str) -> GradeResult:
39
+ """
40
+ Grade a single trace.
41
+
42
+ Args:
43
+ trace: The trace to grade
44
+ policy_text: The policy text to grade against
45
+
46
+ Returns:
47
+ GradeResult with pass/fail and feedback
48
+ """
49
+ prompt = f"""You are a strict evaluator. Grade this response.
50
+
51
+ A response PASSES only if ALL are true:
52
+ 1. Policy Compliant - Every recommendation follows the policy exactly
53
+ 2. Fully Supported - Every claim backed by specific policy section
54
+ 3. Properly Cited - All relevant policy sections referenced
55
+ 4. Complete Reasoning - Chain of thought has no gaps
56
+ 5. Actionable & Specific - Recommendations are concrete, not vague
57
+
58
+ SCENARIO:
59
+ {trace.scenario.description}
60
+
61
+ POLICY:
62
+ {policy_text}
63
+
64
+ RESPONSE TO GRADE:
65
+ {trace.assistant_message}
66
+
67
+ Grade this response."""
68
+
69
+ try:
70
+ # Use structured output for reliable grading
71
+ parsed = await self.llm.generate_structured(prompt, SingleGrade)
72
+ return GradeResult(
73
+ passed=parsed.passed,
74
+ issues=(
75
+ parsed.policy_violations
76
+ + parsed.missing_citations
77
+ + parsed.incomplete_reasoning
78
+ + parsed.vague_recommendations
79
+ ),
80
+ feedback=parsed.feedback,
81
+ )
82
+ except Exception:
83
+ # Fallback: assume fail if we can't parse
84
+ return GradeResult(
85
+ passed=False,
86
+ issues=["Unable to parse grade response"],
87
+ feedback="Grading failed - unable to parse response",
88
+ )
89
+
90
+ async def grade_batch(
91
+ self, traces: list[Trace], policy_text: str
92
+ ) -> list[GradeResult]:
93
+ """
94
+ Grade multiple traces.
95
+
96
+ Args:
97
+ traces: List of traces to grade
98
+ policy_text: The policy text to grade against
99
+
100
+ Returns:
101
+ List of GradeResults in same order as input
102
+ """
103
+ results = []
104
+
105
+ for trace in traces:
106
+ result = await self.grade(trace, policy_text)
107
+ results.append(result)
108
+
109
+ return results
110
+
111
+ async def grade_batch_parallel(
112
+ self, traces: list[Trace], policy_text: str
113
+ ) -> list[GradeResult]:
114
+ """
115
+ Grade multiple traces in parallel.
116
+
117
+ More efficient for large batches but uses more API calls concurrently.
118
+
119
+ Args:
120
+ traces: List of traces to grade
121
+ policy_text: The policy text to grade against
122
+
123
+ Returns:
124
+ List of GradeResults in same order as input
125
+ """
126
+ import asyncio
127
+
128
+ tasks = [self.grade(trace, policy_text) for trace in traces]
129
+ return await asyncio.gather(*tasks)
130
+
@@ -0,0 +1,137 @@
1
+ """Refinement of failed traces based on grader feedback."""
2
+
3
+ from synkro.llm.client import LLM
4
+ from synkro.models import Model, OpenAI
5
+ from synkro.types.core import Trace, GradeResult, Message
6
+ from synkro.prompts.templates import BATCHED_REFINER_PROMPT, SYSTEM_PROMPT
7
+ from synkro.parsers import parse_single_response, extract_content
8
+
9
+
10
+ class Refiner:
11
+ """
12
+ Refines traces that failed grading.
13
+
14
+ Takes failed traces and their grader feedback and generates
15
+ improved versions that address the issues.
16
+
17
+ Examples:
18
+ >>> refiner = Refiner()
19
+ >>> improved = await refiner.refine(failed_trace, grade_result, policy.text)
20
+ """
21
+
22
+ def __init__(self, llm: LLM | None = None, model: Model = OpenAI.GPT_4O_MINI):
23
+ """
24
+ Initialize the refiner.
25
+
26
+ Args:
27
+ llm: LLM client to use (creates one if not provided)
28
+ model: Model to use if creating LLM
29
+ """
30
+ self.llm = llm or LLM(model=model)
31
+ self.prompt_template = BATCHED_REFINER_PROMPT
32
+
33
+ async def refine(
34
+ self, trace: Trace, grade: GradeResult, policy_text: str
35
+ ) -> Trace:
36
+ """
37
+ Refine a failed trace based on grader feedback.
38
+
39
+ Args:
40
+ trace: The trace that failed grading
41
+ grade: The grade result with feedback
42
+ policy_text: The policy text
43
+
44
+ Returns:
45
+ New trace with improved response
46
+ """
47
+ prompt = self._build_prompt(trace, grade, policy_text)
48
+
49
+ response = await self.llm.generate(prompt)
50
+ parsed = parse_single_response(response)
51
+
52
+ if parsed and len(parsed.messages) >= 3:
53
+ messages = [
54
+ Message(role=m.role, content=m.content) for m in parsed.messages
55
+ ]
56
+ else:
57
+ # Fallback: construct from response
58
+ content = extract_content(response)
59
+ messages = [
60
+ Message(role="system", content=SYSTEM_PROMPT),
61
+ Message(
62
+ role="user",
63
+ content=f"Scenario: {trace.scenario.description}\n\nContext: {trace.scenario.context}",
64
+ ),
65
+ Message(role="assistant", content=content),
66
+ ]
67
+
68
+ return Trace(messages=messages, scenario=trace.scenario)
69
+
70
+ def _build_prompt(
71
+ self, trace: Trace, grade: GradeResult, policy_text: str
72
+ ) -> str:
73
+ """Build the refinement prompt."""
74
+ return f"""You are improving a response that failed quality checks.
75
+
76
+ SCENARIO:
77
+ {trace.scenario.description}
78
+
79
+ CONTEXT:
80
+ {trace.scenario.context}
81
+
82
+ ORIGINAL RESPONSE:
83
+ {trace.assistant_message}
84
+
85
+ GRADER FEEDBACK:
86
+ Issues: {', '.join(grade.issues) if grade.issues else 'None listed'}
87
+ Summary: {grade.feedback}
88
+
89
+ POLICY:
90
+ {policy_text}
91
+
92
+ Generate an IMPROVED response that fixes all the issues. Output a JSON object:
93
+ {{
94
+ "messages": [
95
+ {{"role": "system", "content": "<system prompt>"}},
96
+ {{"role": "user", "content": "<the scenario>"}},
97
+ {{"role": "assistant", "content": "<your IMPROVED response>"}}
98
+ ]
99
+ }}
100
+
101
+ The improved response must:
102
+ - Fix all policy violations
103
+ - Add missing citations
104
+ - Complete reasoning with no gaps
105
+ - Make recommendations specific and actionable
106
+ - Keep what was correct from the original
107
+
108
+ Respond with ONLY the JSON object."""
109
+
110
+ async def refine_batch(
111
+ self,
112
+ traces: list[Trace],
113
+ grades: list[GradeResult],
114
+ policy_text: str,
115
+ ) -> list[Trace]:
116
+ """
117
+ Refine multiple failed traces.
118
+
119
+ Args:
120
+ traces: List of traces that failed grading
121
+ grades: Corresponding grade results
122
+ policy_text: The policy text
123
+
124
+ Returns:
125
+ List of refined traces
126
+ """
127
+ refined = []
128
+
129
+ for trace, grade in zip(traces, grades):
130
+ if not grade.passed:
131
+ improved = await self.refine(trace, grade, policy_text)
132
+ refined.append(improved)
133
+ else:
134
+ refined.append(trace)
135
+
136
+ return refined
137
+
@@ -0,0 +1,126 @@
1
+ """Specialized grading for tool call traces."""
2
+
3
+ import json
4
+ from typing import TYPE_CHECKING
5
+
6
+ from synkro.quality.grader import Grader
7
+ from synkro.llm.client import LLM
8
+ from synkro.models import Model, OpenAI
9
+ from synkro.types.core import Trace, GradeResult
10
+ from synkro.schemas import ToolCallGrade
11
+ from synkro.prompts.tool_templates import TOOL_GRADE_PROMPT
12
+
13
+ if TYPE_CHECKING:
14
+ from synkro.types.tool import ToolDefinition
15
+
16
+
17
+ class ToolCallGrader(Grader):
18
+ """
19
+ Specialized grader for tool call traces.
20
+
21
+ Evaluates tool usage on four criteria:
22
+ - Tool Selection: Did they use the right tool?
23
+ - Parameter Accuracy: Were the parameters correct?
24
+ - Response Synthesis: Did they use tool results correctly?
25
+ - Timing: Did they call tools at the right time?
26
+
27
+ Examples:
28
+ >>> grader = ToolCallGrader(tools=[web_search, db_lookup])
29
+ >>> result = await grader.grade(trace, policy_text)
30
+ >>> if not result.passed:
31
+ ... print(f"Issues: {result.issues}")
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ tools: list["ToolDefinition"],
37
+ llm: LLM | None = None,
38
+ model: Model = OpenAI.GPT_52,
39
+ ):
40
+ """
41
+ Initialize the tool call grader.
42
+
43
+ Args:
44
+ tools: List of available tool definitions (for context)
45
+ llm: LLM client to use (creates one if not provided)
46
+ model: Model to use if creating LLM (recommend stronger model)
47
+ """
48
+ super().__init__(llm=llm, model=model)
49
+ self.tools = tools
50
+
51
+ def _get_tools_description(self) -> str:
52
+ """Get formatted description of all tools for grading context."""
53
+ descriptions = []
54
+ for tool in self.tools:
55
+ descriptions.append(tool.to_system_prompt())
56
+ return "\n\n".join(descriptions)
57
+
58
+ def _format_conversation(self, trace: Trace) -> str:
59
+ """Format the trace messages for the grading prompt, including tool_calls."""
60
+ lines = []
61
+ for msg in trace.messages:
62
+ if msg.role == "system":
63
+ lines.append(f"[SYSTEM]\n{msg.content}")
64
+ elif msg.role == "user":
65
+ lines.append(f"[USER]\n{msg.content}")
66
+ elif msg.role == "assistant":
67
+ if msg.tool_calls:
68
+ # Format assistant message with tool calls
69
+ tool_calls_str = []
70
+ for tc in msg.tool_calls:
71
+ tool_calls_str.append(
72
+ f" - {tc.function.name}({tc.function.arguments})"
73
+ )
74
+ lines.append(
75
+ f"[ASSISTANT - TOOL CALLS]\n" + "\n".join(tool_calls_str)
76
+ )
77
+ else:
78
+ lines.append(f"[ASSISTANT]\n{msg.content}")
79
+ elif msg.role == "tool":
80
+ lines.append(
81
+ f"[TOOL RESULT - {msg.tool_call_id}]\n{msg.content}"
82
+ )
83
+ return "\n\n".join(lines)
84
+
85
+ async def grade(self, trace: Trace, policy_text: str) -> GradeResult:
86
+ """
87
+ Grade a tool call trace using tool-specific criteria.
88
+
89
+ Args:
90
+ trace: The trace to grade
91
+ policy_text: The policy/guidelines text
92
+
93
+ Returns:
94
+ GradeResult with pass/fail and detailed feedback
95
+ """
96
+ tools_desc = self._get_tools_description()
97
+ conversation = self._format_conversation(trace)
98
+
99
+ prompt = TOOL_GRADE_PROMPT.format(
100
+ TOOLS_DESCRIPTION=tools_desc,
101
+ GUIDELINES=policy_text,
102
+ SCENARIO=trace.scenario.description,
103
+ CONVERSATION=conversation,
104
+ )
105
+
106
+ try:
107
+ # Use structured output for consistent grading
108
+ parsed = await self.llm.generate_structured(prompt, ToolCallGrade)
109
+
110
+ # Convert to standard GradeResult format
111
+ return GradeResult(
112
+ passed=parsed.passed,
113
+ issues=parsed.get_all_issues(),
114
+ feedback=parsed.feedback,
115
+ )
116
+ except Exception:
117
+ # Fallback: assume fail if we can't parse
118
+ return GradeResult(
119
+ passed=False,
120
+ issues=["Unable to parse grade response"],
121
+ feedback="Grading failed - unable to parse response",
122
+ )
123
+
124
+
125
+ __all__ = ["ToolCallGrader"]
126
+