synkro 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkro/__init__.py +179 -0
- synkro/advanced.py +186 -0
- synkro/cli.py +128 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +402 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +276 -0
- synkro/formatters/__init__.py +12 -0
- synkro/formatters/qa.py +98 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +220 -0
- synkro/generation/golden_responses.py +244 -0
- synkro/generation/golden_scenarios.py +276 -0
- synkro/generation/golden_tool_responses.py +416 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +376 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +12 -0
- synkro/interactive/hitl_session.py +77 -0
- synkro/interactive/logic_map_editor.py +173 -0
- synkro/interactive/rich_ui.py +205 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +235 -0
- synkro/llm/rate_limits.py +95 -0
- synkro/models/__init__.py +43 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +15 -0
- synkro/modes/config.py +66 -0
- synkro/modes/qa.py +18 -0
- synkro/modes/sft.py +18 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +424 -0
- synkro/pipelines.py +123 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +474 -0
- synkro/prompts/interactive_templates.py +65 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/qa_templates.py +97 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +201 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +537 -0
- synkro/schemas.py +472 -0
- synkro/types/__init__.py +41 -0
- synkro/types/core.py +126 -0
- synkro/types/dataset_type.py +30 -0
- synkro/types/logic_map.py +345 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.12.data/data/examples/__init__.py +148 -0
- synkro-0.4.12.dist-info/METADATA +258 -0
- synkro-0.4.12.dist-info/RECORD +77 -0
- synkro-0.4.12.dist-info/WHEEL +4 -0
- synkro-0.4.12.dist-info/entry_points.txt +2 -0
- synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
synkro/schemas.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""Pydantic schemas for structured LLM outputs and validation."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# =============================================================================
|
|
8
|
+
# SCENARIO SCHEMAS
|
|
9
|
+
# =============================================================================
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ScenarioOutput(BaseModel):
|
|
13
|
+
"""Output schema for scenario generation."""
|
|
14
|
+
|
|
15
|
+
scenario: str = Field(description="Detailed scenario description")
|
|
16
|
+
context: str = Field(description="Relevant background information")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ScenariosArray(BaseModel):
|
|
20
|
+
"""Array of generated scenarios."""
|
|
21
|
+
|
|
22
|
+
scenarios: list[ScenarioOutput]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# =============================================================================
|
|
26
|
+
# POLICY ANALYSIS SCHEMAS
|
|
27
|
+
# =============================================================================
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PolicyComplexity(BaseModel):
|
|
31
|
+
"""Policy complexity analysis for auto-detecting optimal turns."""
|
|
32
|
+
|
|
33
|
+
variable_count: int = Field(
|
|
34
|
+
description="Number of variables/conditions in the policy (rules, exceptions, conditions)"
|
|
35
|
+
)
|
|
36
|
+
complexity_level: Literal["simple", "conditional", "complex"] = Field(
|
|
37
|
+
description="Overall complexity: simple (1 var), conditional (2-3 vars), complex (4+ vars)"
|
|
38
|
+
)
|
|
39
|
+
recommended_turns: int = Field(
|
|
40
|
+
ge=1, le=6, description="Recommended conversation turns based on complexity"
|
|
41
|
+
)
|
|
42
|
+
reasoning: str = Field(description="Brief explanation of the complexity assessment")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class PlanCategory(BaseModel):
|
|
46
|
+
"""A category in the generation plan."""
|
|
47
|
+
|
|
48
|
+
name: str = Field(description='Short category name (e.g., "Consent Violations", "Edge Cases")')
|
|
49
|
+
description: str = Field(description="What this category tests")
|
|
50
|
+
traces: int = Field(ge=1, description="Number of traces to generate for this category")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class PolicyPlan(BaseModel):
|
|
54
|
+
"""LLM-generated plan for dataset generation."""
|
|
55
|
+
|
|
56
|
+
categories: list[PlanCategory] = Field(
|
|
57
|
+
min_length=2, max_length=10, description="Scenario categories with trace allocations"
|
|
58
|
+
)
|
|
59
|
+
reasoning: str = Field(
|
|
60
|
+
description="Explanation of why these categories were chosen based on policy content"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# =============================================================================
|
|
65
|
+
# CHAT MESSAGE SCHEMAS
|
|
66
|
+
# =============================================================================
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ChatMessage(BaseModel):
|
|
70
|
+
"""A single chat message in OpenAI format."""
|
|
71
|
+
|
|
72
|
+
role: Literal["system", "user", "assistant"] = Field(description="Message role")
|
|
73
|
+
content: str = Field(description="Message content")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ConversationOutput(BaseModel):
|
|
77
|
+
"""Output from response generation - a complete conversation."""
|
|
78
|
+
|
|
79
|
+
index: int = Field(description="Scenario index (0-based)")
|
|
80
|
+
messages: list[ChatMessage] = Field(
|
|
81
|
+
description="Full conversation with system, user, and assistant messages"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class BatchedConversations(BaseModel):
|
|
86
|
+
"""Batch of generated conversations."""
|
|
87
|
+
|
|
88
|
+
conversations: list[ConversationOutput]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# =============================================================================
|
|
92
|
+
# GRADING SCHEMAS
|
|
93
|
+
# =============================================================================
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class GradeOutput(BaseModel):
|
|
97
|
+
"""Grading result for a single response."""
|
|
98
|
+
|
|
99
|
+
index: int = Field(description="Scenario index (0-based)")
|
|
100
|
+
passed: bool = Field(
|
|
101
|
+
alias="pass", description="Is the response FULLY correct, policy-compliant, and format-valid?"
|
|
102
|
+
)
|
|
103
|
+
policy_violations: list[str] = Field(
|
|
104
|
+
default_factory=list,
|
|
105
|
+
description="Specific policy rules that were violated or misinterpreted",
|
|
106
|
+
)
|
|
107
|
+
missing_citations: list[str] = Field(
|
|
108
|
+
default_factory=list,
|
|
109
|
+
description="Policy sections that should have been cited but were not",
|
|
110
|
+
)
|
|
111
|
+
incomplete_reasoning: list[str] = Field(
|
|
112
|
+
default_factory=list, description="Logical gaps or missing steps in the chain of thought"
|
|
113
|
+
)
|
|
114
|
+
vague_recommendations: list[str] = Field(
|
|
115
|
+
default_factory=list,
|
|
116
|
+
description="Recommendations that need to be more specific or actionable",
|
|
117
|
+
)
|
|
118
|
+
feedback: str = Field(description="Summary of how to fix the issues")
|
|
119
|
+
|
|
120
|
+
class Config:
|
|
121
|
+
populate_by_name = True
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class BatchedGrades(BaseModel):
|
|
125
|
+
"""Batch of grading results."""
|
|
126
|
+
|
|
127
|
+
grades: list[GradeOutput]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# =============================================================================
|
|
131
|
+
# SINGLE-ITEM SCHEMAS (for parallel generation)
|
|
132
|
+
# =============================================================================
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class SingleResponse(BaseModel):
|
|
136
|
+
"""Single response output for parallel generation."""
|
|
137
|
+
|
|
138
|
+
messages: list[ChatMessage] = Field(
|
|
139
|
+
min_length=3, max_length=3, description="Exactly 3 messages: system, user, assistant"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class MultiTurnResponse(BaseModel):
|
|
144
|
+
"""Multi-turn response output for complexity-driven generation."""
|
|
145
|
+
|
|
146
|
+
messages: list[ChatMessage] = Field(
|
|
147
|
+
min_length=3,
|
|
148
|
+
description="Conversation messages (variable length based on turn count)"
|
|
149
|
+
)
|
|
150
|
+
turn_count: int = Field(
|
|
151
|
+
ge=1, le=10,
|
|
152
|
+
description="Number of user-assistant exchanges in this conversation"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class SingleGrade(BaseModel):
|
|
157
|
+
"""Single grade output for parallel generation."""
|
|
158
|
+
|
|
159
|
+
passed: bool = Field(
|
|
160
|
+
alias="pass", description="Is the response FULLY correct, policy-compliant, and format-valid?"
|
|
161
|
+
)
|
|
162
|
+
policy_violations: list[str] = Field(
|
|
163
|
+
default_factory=list, description="Specific policy rules that were violated"
|
|
164
|
+
)
|
|
165
|
+
missing_citations: list[str] = Field(
|
|
166
|
+
default_factory=list, description="Policy sections that should have been cited"
|
|
167
|
+
)
|
|
168
|
+
incomplete_reasoning: list[str] = Field(
|
|
169
|
+
default_factory=list, description="Logical gaps or missing reasoning steps"
|
|
170
|
+
)
|
|
171
|
+
vague_recommendations: list[str] = Field(
|
|
172
|
+
default_factory=list, description="Recommendations that need to be more specific"
|
|
173
|
+
)
|
|
174
|
+
feedback: str = Field(description='Summary of issues or "Correct" if passing')
|
|
175
|
+
|
|
176
|
+
class Config:
|
|
177
|
+
populate_by_name = True
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# =============================================================================
|
|
181
|
+
# MULTI-TURN SCHEMAS
|
|
182
|
+
# =============================================================================
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class FollowUpQuestion(BaseModel):
|
|
186
|
+
"""A follow-up question for multi-turn conversations."""
|
|
187
|
+
|
|
188
|
+
index: int = Field(description="Scenario index")
|
|
189
|
+
question: str = Field(description="Follow-up question from the user")
|
|
190
|
+
question_type: Literal["clarification", "edge_case", "what_if", "specificity", "challenge"] = (
|
|
191
|
+
Field(description="Type of follow-up")
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class TurnGrade(BaseModel):
|
|
196
|
+
"""Grade for a single turn in a multi-turn conversation."""
|
|
197
|
+
|
|
198
|
+
turn_index: int = Field(description="Which turn (0-based, only assistant turns)")
|
|
199
|
+
passed: bool = Field(alias="pass", description="Does this turn pass all criteria?")
|
|
200
|
+
policy_violations: list[str] = Field(
|
|
201
|
+
default_factory=list, description="Policy violations in this turn"
|
|
202
|
+
)
|
|
203
|
+
missing_citations: list[str] = Field(
|
|
204
|
+
default_factory=list, description="Missing citations in this turn"
|
|
205
|
+
)
|
|
206
|
+
incomplete_reasoning: list[str] = Field(
|
|
207
|
+
default_factory=list, description="Reasoning gaps in this turn"
|
|
208
|
+
)
|
|
209
|
+
vague_recommendations: list[str] = Field(
|
|
210
|
+
default_factory=list, description="Vague recommendations in this turn"
|
|
211
|
+
)
|
|
212
|
+
feedback: str = Field(description="Specific feedback for this turn")
|
|
213
|
+
|
|
214
|
+
class Config:
|
|
215
|
+
populate_by_name = True
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class ConversationGrade(BaseModel):
|
|
219
|
+
"""Full grading for a multi-turn conversation."""
|
|
220
|
+
|
|
221
|
+
index: int = Field(description="Scenario index")
|
|
222
|
+
overall_pass: bool = Field(description="Does the ENTIRE conversation pass?")
|
|
223
|
+
turn_grades: list[TurnGrade] = Field(description="Grade for each assistant turn")
|
|
224
|
+
coherence_pass: bool = Field(
|
|
225
|
+
description="Is the conversation coherent with no contradictions?"
|
|
226
|
+
)
|
|
227
|
+
coherence_issues: list[str] = Field(
|
|
228
|
+
default_factory=list, description="Any contradictions or incoherence across turns"
|
|
229
|
+
)
|
|
230
|
+
progressive_depth: bool = Field(
|
|
231
|
+
description="Does each turn build on previous context appropriately?"
|
|
232
|
+
)
|
|
233
|
+
overall_feedback: str = Field(
|
|
234
|
+
description="Summary of what needs to be fixed across the conversation"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# =============================================================================
|
|
239
|
+
# AGENTIC SCHEMAS
|
|
240
|
+
# =============================================================================
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class ToolCall(BaseModel):
|
|
244
|
+
"""A tool call in an agentic trace."""
|
|
245
|
+
|
|
246
|
+
tool_name: str = Field(description="Name of the tool to call")
|
|
247
|
+
arguments: dict[str, str] = Field(description="Arguments to pass to the tool")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class AgenticStep(BaseModel):
|
|
251
|
+
"""A single step in an agentic trace."""
|
|
252
|
+
|
|
253
|
+
reasoning: str = Field(description="Reasoning before tool call")
|
|
254
|
+
tool_name: str = Field(description="Tool to call")
|
|
255
|
+
tool_args: dict = Field(description="Tool arguments")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class AgenticTrace(BaseModel):
|
|
259
|
+
"""Complete agentic trace with tool usage."""
|
|
260
|
+
|
|
261
|
+
index: int = Field(description="Scenario index")
|
|
262
|
+
steps: list[AgenticStep] = Field(description="Steps of tool usage")
|
|
263
|
+
final_answer: str = Field(description="Final comprehensive answer")
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# =============================================================================
|
|
267
|
+
# TOOL CALL GRADING SCHEMAS
|
|
268
|
+
# =============================================================================
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class ToolCallGrade(BaseModel):
|
|
272
|
+
"""Grading result for a tool call trace.
|
|
273
|
+
|
|
274
|
+
Evaluates tool usage on four criteria:
|
|
275
|
+
- Tool Selection: Did they use the right tool?
|
|
276
|
+
- Parameter Accuracy: Were the parameters correct?
|
|
277
|
+
- Response Synthesis: Did they use tool results correctly?
|
|
278
|
+
- Timing: Did they call tools at the right time?
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
passed: bool = Field(
|
|
282
|
+
alias="pass",
|
|
283
|
+
description="Does the trace pass ALL criteria?"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Criterion 1: Tool Selection
|
|
287
|
+
tool_selection_correct: bool = Field(
|
|
288
|
+
description="Did the assistant choose the appropriate tool for the task?"
|
|
289
|
+
)
|
|
290
|
+
tool_selection_issues: list[str] = Field(
|
|
291
|
+
default_factory=list,
|
|
292
|
+
description="Specific issues with tool selection (wrong tool, missing tool, unnecessary tool)"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Criterion 2: Parameter Accuracy
|
|
296
|
+
parameters_valid: bool = Field(
|
|
297
|
+
description="Were the tool parameters correct (types, values, required fields)?"
|
|
298
|
+
)
|
|
299
|
+
parameter_issues: list[str] = Field(
|
|
300
|
+
default_factory=list,
|
|
301
|
+
description="Specific issues with parameters (wrong type, invalid value, missing required)"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Criterion 3: Response Synthesis
|
|
305
|
+
synthesis_accurate: bool = Field(
|
|
306
|
+
description="Did the assistant correctly use tool results without hallucination?"
|
|
307
|
+
)
|
|
308
|
+
synthesis_issues: list[str] = Field(
|
|
309
|
+
default_factory=list,
|
|
310
|
+
description="Specific issues with synthesis (hallucinated data, ignored results, misinterpreted)"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Criterion 4: Timing
|
|
314
|
+
timing_appropriate: bool = Field(
|
|
315
|
+
description="Did the assistant call tools at the right moment?"
|
|
316
|
+
)
|
|
317
|
+
timing_issues: list[str] = Field(
|
|
318
|
+
default_factory=list,
|
|
319
|
+
description="Specific issues with timing (premature call, delayed call, should have called earlier)"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Overall feedback
|
|
323
|
+
feedback: str = Field(
|
|
324
|
+
description="Summary of issues or 'Correct' if passing"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
class Config:
|
|
328
|
+
populate_by_name = True
|
|
329
|
+
|
|
330
|
+
def get_all_issues(self) -> list[str]:
|
|
331
|
+
"""Get all issues combined."""
|
|
332
|
+
return (
|
|
333
|
+
self.tool_selection_issues
|
|
334
|
+
+ self.parameter_issues
|
|
335
|
+
+ self.synthesis_issues
|
|
336
|
+
+ self.timing_issues
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# =============================================================================
|
|
341
|
+
# GOLDEN TRACE SCHEMAS
|
|
342
|
+
# =============================================================================
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class RuleExtraction(BaseModel):
|
|
346
|
+
"""A single rule extracted from the policy."""
|
|
347
|
+
|
|
348
|
+
rule_id: str = Field(description="Unique identifier (e.g., 'R001')")
|
|
349
|
+
text: str = Field(description="Exact rule text from the policy")
|
|
350
|
+
condition: str = Field(description="The 'if' part - when this rule applies")
|
|
351
|
+
action: str = Field(description="The 'then' part - what happens")
|
|
352
|
+
dependencies: list[str] = Field(
|
|
353
|
+
default_factory=list,
|
|
354
|
+
description="Rule IDs that must be evaluated before this rule"
|
|
355
|
+
)
|
|
356
|
+
category: Literal["constraint", "permission", "procedure", "exception"] = Field(
|
|
357
|
+
description="Type of rule"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
class LogicMapOutput(BaseModel):
|
|
362
|
+
"""Output schema for logic extraction - the complete DAG of rules."""
|
|
363
|
+
|
|
364
|
+
rules: list[RuleExtraction] = Field(
|
|
365
|
+
description="All rules extracted from the policy"
|
|
366
|
+
)
|
|
367
|
+
root_rules: list[str] = Field(
|
|
368
|
+
description="Rule IDs with no dependencies (entry points)"
|
|
369
|
+
)
|
|
370
|
+
reasoning: str = Field(
|
|
371
|
+
description="Explanation of rule extraction and relationship identification"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class RefinedLogicMapOutput(BaseModel):
|
|
376
|
+
"""Output schema for Logic Map refinement based on user feedback."""
|
|
377
|
+
|
|
378
|
+
rules: list[RuleExtraction] = Field(
|
|
379
|
+
description="All rules in the refined Logic Map (modified and unmodified)"
|
|
380
|
+
)
|
|
381
|
+
root_rules: list[str] = Field(
|
|
382
|
+
description="Rule IDs with no dependencies (entry points)"
|
|
383
|
+
)
|
|
384
|
+
changes_summary: str = Field(
|
|
385
|
+
description="Brief summary of changes made (e.g., 'Added R009, removed R005')"
|
|
386
|
+
)
|
|
387
|
+
reasoning: str = Field(
|
|
388
|
+
description="Explanation of how user feedback was interpreted and applied"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class GoldenScenarioOutput(BaseModel):
|
|
393
|
+
"""Output schema for a single golden scenario."""
|
|
394
|
+
|
|
395
|
+
description: str = Field(description="The user's request or question")
|
|
396
|
+
context: str = Field(default="", description="Additional context")
|
|
397
|
+
scenario_type: Literal["positive", "negative", "edge_case", "irrelevant"] = Field(
|
|
398
|
+
description="Type of scenario"
|
|
399
|
+
)
|
|
400
|
+
target_rule_ids: list[str] = Field(
|
|
401
|
+
description="Rule IDs this scenario tests"
|
|
402
|
+
)
|
|
403
|
+
expected_outcome: str = Field(
|
|
404
|
+
description="Expected behavior based on rules"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class GoldenScenariosArray(BaseModel):
|
|
409
|
+
"""Array of generated golden scenarios."""
|
|
410
|
+
|
|
411
|
+
scenarios: list[GoldenScenarioOutput]
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
class ReasoningStepOutput(BaseModel):
|
|
415
|
+
"""A single step in the Chain-of-Thought reasoning."""
|
|
416
|
+
|
|
417
|
+
rule_id: str = Field(description="The rule being evaluated")
|
|
418
|
+
rule_text: str = Field(description="The text of the rule")
|
|
419
|
+
applies: bool = Field(description="Whether this rule applies")
|
|
420
|
+
reasoning: str = Field(description="Why the rule does/doesn't apply")
|
|
421
|
+
exclusions: list[str] = Field(
|
|
422
|
+
default_factory=list,
|
|
423
|
+
description="Rule IDs excluded because this rule applies"
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
class GoldenTraceOutput(BaseModel):
|
|
428
|
+
"""Output schema for a golden trace with grounded reasoning."""
|
|
429
|
+
|
|
430
|
+
messages: list[ChatMessage] = Field(
|
|
431
|
+
description="The conversation messages"
|
|
432
|
+
)
|
|
433
|
+
reasoning_chain: list[ReasoningStepOutput] = Field(
|
|
434
|
+
description="Step-by-step reasoning with rule citations"
|
|
435
|
+
)
|
|
436
|
+
rules_applied: list[str] = Field(
|
|
437
|
+
description="Rule IDs that were applied in the response"
|
|
438
|
+
)
|
|
439
|
+
rules_excluded: list[str] = Field(
|
|
440
|
+
default_factory=list,
|
|
441
|
+
description="Rule IDs that were explicitly excluded and why"
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
class VerificationOutput(BaseModel):
|
|
446
|
+
"""Output schema for trace verification against Logic Map."""
|
|
447
|
+
|
|
448
|
+
passed: bool = Field(description="Whether the trace passed verification")
|
|
449
|
+
issues: list[str] = Field(
|
|
450
|
+
default_factory=list,
|
|
451
|
+
description="List of issues found"
|
|
452
|
+
)
|
|
453
|
+
skipped_rules: list[str] = Field(
|
|
454
|
+
default_factory=list,
|
|
455
|
+
description="Rule IDs that should have been applied but weren't"
|
|
456
|
+
)
|
|
457
|
+
hallucinated_rules: list[str] = Field(
|
|
458
|
+
default_factory=list,
|
|
459
|
+
description="Rule IDs cited that don't exist or don't apply"
|
|
460
|
+
)
|
|
461
|
+
contradictions: list[str] = Field(
|
|
462
|
+
default_factory=list,
|
|
463
|
+
description="Logical contradictions found"
|
|
464
|
+
)
|
|
465
|
+
rules_verified: list[str] = Field(
|
|
466
|
+
default_factory=list,
|
|
467
|
+
description="Rule IDs correctly applied"
|
|
468
|
+
)
|
|
469
|
+
feedback: str = Field(
|
|
470
|
+
description="Summary of verification result"
|
|
471
|
+
)
|
|
472
|
+
|
synkro/types/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Type definitions for Synkro.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
from synkro.types import DatasetType, Message, Trace
|
|
5
|
+
from synkro.types import ToolDefinition, ToolCall, ToolFunction
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from synkro.types.core import (
|
|
9
|
+
Role,
|
|
10
|
+
Message,
|
|
11
|
+
Scenario,
|
|
12
|
+
Trace,
|
|
13
|
+
GradeResult,
|
|
14
|
+
Plan,
|
|
15
|
+
Category,
|
|
16
|
+
)
|
|
17
|
+
from synkro.types.dataset_type import DatasetType
|
|
18
|
+
from synkro.types.tool import (
|
|
19
|
+
ToolDefinition,
|
|
20
|
+
ToolCall,
|
|
21
|
+
ToolFunction,
|
|
22
|
+
ToolResult,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Dataset type
|
|
27
|
+
"DatasetType",
|
|
28
|
+
# Core types
|
|
29
|
+
"Role",
|
|
30
|
+
"Message",
|
|
31
|
+
"Scenario",
|
|
32
|
+
"Trace",
|
|
33
|
+
"GradeResult",
|
|
34
|
+
"Plan",
|
|
35
|
+
"Category",
|
|
36
|
+
# Tool types
|
|
37
|
+
"ToolDefinition",
|
|
38
|
+
"ToolCall",
|
|
39
|
+
"ToolFunction",
|
|
40
|
+
"ToolResult",
|
|
41
|
+
]
|
synkro/types/core.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Core Pydantic models for Synkro."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal, Any
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Role = Literal["system", "user", "assistant", "tool"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Message(BaseModel):
|
|
11
|
+
"""
|
|
12
|
+
A single message in a conversation.
|
|
13
|
+
|
|
14
|
+
Supports both regular chat messages and tool-calling messages.
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
>>> # Regular message
|
|
18
|
+
>>> Message(role="user", content="Hello")
|
|
19
|
+
|
|
20
|
+
>>> # Assistant with tool call (tool_calls is list of dicts or ToolCall objects)
|
|
21
|
+
>>> Message(role="assistant", content=None, tool_calls=[...])
|
|
22
|
+
|
|
23
|
+
>>> # Tool response
|
|
24
|
+
>>> Message(role="tool", content="Result", tool_call_id="call_123")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
role: Role
|
|
28
|
+
content: str | None = None
|
|
29
|
+
tool_calls: list[Any] | None = Field(
|
|
30
|
+
default=None,
|
|
31
|
+
description="Tool calls made by the assistant (list of ToolCall or dicts)"
|
|
32
|
+
)
|
|
33
|
+
tool_call_id: str | None = Field(
|
|
34
|
+
default=None,
|
|
35
|
+
description="ID of the tool call this message responds to (for tool role)"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def model_post_init(self, __context) -> None:
|
|
39
|
+
"""Validate message structure based on role."""
|
|
40
|
+
# For backwards compatibility, ensure content is string for non-tool roles
|
|
41
|
+
if self.role in ("system", "user") and self.content is None:
|
|
42
|
+
self.content = ""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Scenario(BaseModel):
|
|
46
|
+
"""A test scenario for trace generation."""
|
|
47
|
+
|
|
48
|
+
description: str = Field(description="The scenario description")
|
|
49
|
+
context: str = Field(description="Additional context and background")
|
|
50
|
+
category: str | None = Field(default=None, description="Category this scenario belongs to")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class GradeResult(BaseModel):
|
|
54
|
+
"""Result of grading a trace."""
|
|
55
|
+
|
|
56
|
+
passed: bool = Field(description="Whether the trace passes quality checks")
|
|
57
|
+
issues: list[str] = Field(default_factory=list, description="List of issues found")
|
|
58
|
+
feedback: str = Field(default="", description="Summary feedback for improvement")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Trace(BaseModel):
|
|
62
|
+
"""A complete training trace with messages and metadata."""
|
|
63
|
+
|
|
64
|
+
messages: list[Message] = Field(description="The conversation messages")
|
|
65
|
+
scenario: Scenario = Field(description="The scenario this trace was generated from")
|
|
66
|
+
grade: GradeResult | None = Field(default=None, description="Grading result if graded")
|
|
67
|
+
|
|
68
|
+
# Golden Trace metadata (for verification)
|
|
69
|
+
reasoning_chain: list[Any] | None = Field(default=None, description="Chain-of-thought reasoning steps with rule citations")
|
|
70
|
+
rules_applied: list[str] | None = Field(default=None, description="Rule IDs that were applied in the response")
|
|
71
|
+
rules_excluded: list[str] | None = Field(default=None, description="Rule IDs that were explicitly excluded")
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def system_message(self) -> str | None:
|
|
75
|
+
"""Get the system message content."""
|
|
76
|
+
for m in self.messages:
|
|
77
|
+
if m.role == "system":
|
|
78
|
+
return m.content
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def user_message(self) -> str:
|
|
83
|
+
"""Get the first user message content."""
|
|
84
|
+
for m in self.messages:
|
|
85
|
+
if m.role == "user":
|
|
86
|
+
return m.content or ""
|
|
87
|
+
return ""
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def assistant_message(self) -> str:
|
|
91
|
+
"""Get the last assistant message content."""
|
|
92
|
+
for m in reversed(self.messages):
|
|
93
|
+
if m.role == "assistant":
|
|
94
|
+
return m.content or ""
|
|
95
|
+
return ""
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def has_tool_calls(self) -> bool:
|
|
99
|
+
"""Check if this trace contains any tool calls."""
|
|
100
|
+
for m in self.messages:
|
|
101
|
+
if m.tool_calls:
|
|
102
|
+
return True
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class Category(BaseModel):
|
|
107
|
+
"""A category for organizing scenarios."""
|
|
108
|
+
|
|
109
|
+
name: str = Field(description="Category name")
|
|
110
|
+
description: str = Field(description="What this category tests")
|
|
111
|
+
count: int = Field(description="Number of traces to generate for this category")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class Plan(BaseModel):
|
|
115
|
+
"""A generation plan with categories and complexity analysis."""
|
|
116
|
+
|
|
117
|
+
categories: list[Category] = Field(description="Categories with trace allocations")
|
|
118
|
+
reasoning: str = Field(description="Explanation of why these categories were chosen")
|
|
119
|
+
recommended_turns: int = Field(
|
|
120
|
+
default=1,
|
|
121
|
+
description="Recommended conversation turns based on policy complexity"
|
|
122
|
+
)
|
|
123
|
+
complexity_level: Literal["simple", "conditional", "complex"] = Field(
|
|
124
|
+
default="simple",
|
|
125
|
+
description="Policy complexity level: simple (1-2 turns), conditional (3 turns), complex (5+ turns)"
|
|
126
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Dataset type enum for steering generation pipeline."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DatasetType(str, Enum):
|
|
7
|
+
"""
|
|
8
|
+
Type of dataset to generate.
|
|
9
|
+
|
|
10
|
+
The dataset type determines:
|
|
11
|
+
- Prompts used for scenario and response generation
|
|
12
|
+
- Grading criteria
|
|
13
|
+
- Output format and schema
|
|
14
|
+
|
|
15
|
+
Examples:
|
|
16
|
+
>>> from synkro import DatasetType
|
|
17
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.QA)
|
|
18
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.SFT)
|
|
19
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.TOOL_CALL, tools=[...])
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
QA = "qa"
|
|
23
|
+
"""Question-Answer pairs: {question, answer, context}"""
|
|
24
|
+
|
|
25
|
+
SFT = "sft"
|
|
26
|
+
"""Supervised Fine-Tuning: {messages: [system, user, assistant]}"""
|
|
27
|
+
|
|
28
|
+
TOOL_CALL = "tool_call"
|
|
29
|
+
"""Tool Calling: {messages: [..., {tool_calls: [...]}, {role: tool}, ...]}"""
|
|
30
|
+
|