synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
"""Pipeline phases for generation.
|
|
2
|
+
|
|
3
|
+
Each phase is a self-contained, testable unit that handles one step
|
|
4
|
+
of the generation pipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
from asyncio import Semaphore
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from synkro.core.policy import Policy
|
|
12
|
+
from synkro.types.core import Plan, Scenario, Trace
|
|
13
|
+
from synkro.generation.planner import Planner
|
|
14
|
+
from synkro.generation.scenarios import ScenarioGenerator
|
|
15
|
+
from synkro.generation.responses import ResponseGenerator
|
|
16
|
+
from synkro.quality.grader import Grader
|
|
17
|
+
from synkro.quality.refiner import Refiner
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from synkro.generation.tool_responses import ToolCallResponseGenerator
|
|
21
|
+
from synkro.generation.logic_extractor import LogicExtractor
|
|
22
|
+
from synkro.generation.golden_scenarios import GoldenScenarioGenerator
|
|
23
|
+
from synkro.generation.golden_responses import GoldenResponseGenerator
|
|
24
|
+
from synkro.generation.golden_tool_responses import GoldenToolCallResponseGenerator
|
|
25
|
+
from synkro.quality.verifier import TraceVerifier
|
|
26
|
+
from synkro.quality.golden_refiner import GoldenRefiner
|
|
27
|
+
from synkro.types.logic_map import LogicMap, GoldenScenario
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PlanPhase:
|
|
31
|
+
"""
|
|
32
|
+
Planning phase - analyzes policy and creates category distribution.
|
|
33
|
+
|
|
34
|
+
This phase uses a stronger model to understand the policy and
|
|
35
|
+
determine optimal scenario distribution. When analyze_turns is True,
|
|
36
|
+
also performs complexity analysis to determine recommended turns.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
async def execute(
|
|
40
|
+
self,
|
|
41
|
+
policy: Policy,
|
|
42
|
+
traces: int,
|
|
43
|
+
planner: Planner,
|
|
44
|
+
analyze_turns: bool = True,
|
|
45
|
+
) -> Plan:
|
|
46
|
+
"""
|
|
47
|
+
Execute the planning phase.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
policy: The policy to analyze
|
|
51
|
+
traces: Target number of traces
|
|
52
|
+
planner: Planner component to use
|
|
53
|
+
analyze_turns: Whether to analyze complexity for turn recommendations
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Plan with categories, trace distribution, and turn recommendations
|
|
57
|
+
"""
|
|
58
|
+
return await planner.plan(policy.text, traces, analyze_turns=analyze_turns)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ScenarioPhase:
|
|
62
|
+
"""
|
|
63
|
+
Scenario generation phase - creates scenarios for each category.
|
|
64
|
+
|
|
65
|
+
Runs in parallel across categories for efficiency.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
async def execute(
|
|
69
|
+
self,
|
|
70
|
+
policy: Policy,
|
|
71
|
+
plan: Plan,
|
|
72
|
+
generator: ScenarioGenerator,
|
|
73
|
+
semaphore: Semaphore,
|
|
74
|
+
) -> list[Scenario]:
|
|
75
|
+
"""
|
|
76
|
+
Execute scenario generation.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
policy: The policy text
|
|
80
|
+
plan: Plan with categories
|
|
81
|
+
generator: ScenarioGenerator component
|
|
82
|
+
semaphore: Semaphore for rate limiting
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List of all generated scenarios
|
|
86
|
+
"""
|
|
87
|
+
async def limited_generate(category):
|
|
88
|
+
async with semaphore:
|
|
89
|
+
return await generator.generate(policy.text, category.count, category=category)
|
|
90
|
+
|
|
91
|
+
tasks = [limited_generate(cat) for cat in plan.categories]
|
|
92
|
+
results = await asyncio.gather(*tasks)
|
|
93
|
+
|
|
94
|
+
# Flatten results
|
|
95
|
+
return [scenario for batch in results for scenario in batch]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ResponsePhase:
|
|
99
|
+
"""
|
|
100
|
+
Response generation phase - creates responses for each scenario.
|
|
101
|
+
|
|
102
|
+
Runs fully parallel with semaphore control. Supports both single-turn
|
|
103
|
+
and multi-turn trace generation based on target_turns.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
async def execute(
|
|
107
|
+
self,
|
|
108
|
+
policy: Policy,
|
|
109
|
+
scenarios: list[Scenario],
|
|
110
|
+
generator: ResponseGenerator,
|
|
111
|
+
semaphore: Semaphore,
|
|
112
|
+
target_turns: int = 1,
|
|
113
|
+
) -> list[Trace]:
|
|
114
|
+
"""
|
|
115
|
+
Execute response generation.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
policy: The policy text
|
|
119
|
+
scenarios: List of scenarios to respond to
|
|
120
|
+
generator: ResponseGenerator component
|
|
121
|
+
semaphore: Semaphore for rate limiting
|
|
122
|
+
target_turns: Number of conversation turns (1 for single-turn)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of traces with generated responses
|
|
126
|
+
"""
|
|
127
|
+
async def limited_generate(scenario):
|
|
128
|
+
async with semaphore:
|
|
129
|
+
return await generator._generate_single(policy.text, scenario, target_turns)
|
|
130
|
+
|
|
131
|
+
tasks = [limited_generate(s) for s in scenarios]
|
|
132
|
+
return await asyncio.gather(*tasks)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class GradingPhase:
|
|
136
|
+
"""
|
|
137
|
+
Grading and refinement phase - evaluates and improves responses.
|
|
138
|
+
|
|
139
|
+
Includes the refinement loop for failed traces.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
async def execute(
|
|
143
|
+
self,
|
|
144
|
+
policy: Policy,
|
|
145
|
+
traces: list[Trace],
|
|
146
|
+
grader: Grader,
|
|
147
|
+
refiner: Refiner,
|
|
148
|
+
max_iterations: int,
|
|
149
|
+
semaphore: Semaphore,
|
|
150
|
+
) -> tuple[list[Trace], float]:
|
|
151
|
+
"""
|
|
152
|
+
Execute grading and refinement.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
policy: The policy text
|
|
156
|
+
traces: List of traces to grade
|
|
157
|
+
grader: Grader component
|
|
158
|
+
refiner: Refiner component
|
|
159
|
+
max_iterations: Maximum refinement iterations
|
|
160
|
+
semaphore: Semaphore for rate limiting
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Tuple of (graded traces, pass rate percentage)
|
|
164
|
+
"""
|
|
165
|
+
async def limited_grade(trace):
|
|
166
|
+
async with semaphore:
|
|
167
|
+
return await grader.grade(trace, policy.text)
|
|
168
|
+
|
|
169
|
+
async def limited_refine(trace, grade):
|
|
170
|
+
async with semaphore:
|
|
171
|
+
return await refiner.refine(trace, grade, policy.text)
|
|
172
|
+
|
|
173
|
+
# Initial grading
|
|
174
|
+
grade_tasks = [limited_grade(t) for t in traces]
|
|
175
|
+
grades = await asyncio.gather(*grade_tasks)
|
|
176
|
+
|
|
177
|
+
# Attach grades
|
|
178
|
+
final_traces = list(traces)
|
|
179
|
+
for trace, grade in zip(final_traces, grades):
|
|
180
|
+
trace.grade = grade
|
|
181
|
+
|
|
182
|
+
# Refinement loop
|
|
183
|
+
for iteration in range(1, max_iterations):
|
|
184
|
+
failed_indices = [i for i, t in enumerate(final_traces) if not t.grade.passed]
|
|
185
|
+
|
|
186
|
+
if not failed_indices:
|
|
187
|
+
break
|
|
188
|
+
|
|
189
|
+
# Refine failed traces
|
|
190
|
+
refine_tasks = [
|
|
191
|
+
limited_refine(final_traces[i], final_traces[i].grade)
|
|
192
|
+
for i in failed_indices
|
|
193
|
+
]
|
|
194
|
+
refined_traces = await asyncio.gather(*refine_tasks)
|
|
195
|
+
|
|
196
|
+
# Preserve original scenarios and update traces
|
|
197
|
+
for idx, refined in zip(failed_indices, refined_traces):
|
|
198
|
+
refined.scenario = final_traces[idx].scenario
|
|
199
|
+
final_traces[idx] = refined
|
|
200
|
+
|
|
201
|
+
# Re-grade refined traces
|
|
202
|
+
regrade_tasks = [limited_grade(final_traces[i]) for i in failed_indices]
|
|
203
|
+
new_grades = await asyncio.gather(*regrade_tasks)
|
|
204
|
+
|
|
205
|
+
for idx, grade in zip(failed_indices, new_grades):
|
|
206
|
+
final_traces[idx].grade = grade
|
|
207
|
+
|
|
208
|
+
# Calculate pass rate
|
|
209
|
+
passed_count = sum(1 for t in final_traces if t.grade and t.grade.passed)
|
|
210
|
+
pass_rate = (passed_count / len(final_traces) * 100) if final_traces else 0
|
|
211
|
+
|
|
212
|
+
return final_traces, pass_rate
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class ToolCallResponsePhase:
|
|
216
|
+
"""
|
|
217
|
+
Tool call response generation phase - creates traces with proper tool calling format.
|
|
218
|
+
|
|
219
|
+
Uses ToolCallResponseGenerator to produce traces with:
|
|
220
|
+
- System message with tool descriptions
|
|
221
|
+
- User message
|
|
222
|
+
- Assistant message with tool_calls (or direct response)
|
|
223
|
+
- Tool response messages
|
|
224
|
+
- Final assistant message
|
|
225
|
+
|
|
226
|
+
Supports multi-turn tool calling sequences with follow-up questions.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
async def execute(
|
|
230
|
+
self,
|
|
231
|
+
policy: Policy,
|
|
232
|
+
scenarios: list[Scenario],
|
|
233
|
+
generator: "ToolCallResponseGenerator",
|
|
234
|
+
semaphore: Semaphore,
|
|
235
|
+
target_turns: int = 1,
|
|
236
|
+
) -> list[Trace]:
|
|
237
|
+
"""
|
|
238
|
+
Execute tool call response generation.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
policy: The policy/guidelines text
|
|
242
|
+
scenarios: List of scenarios to respond to
|
|
243
|
+
generator: ToolCallResponseGenerator component
|
|
244
|
+
semaphore: Semaphore for rate limiting
|
|
245
|
+
target_turns: Number of conversation turns (1 for single-turn)
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
List of traces with proper tool calling format
|
|
249
|
+
"""
|
|
250
|
+
async def limited_generate(scenario):
|
|
251
|
+
async with semaphore:
|
|
252
|
+
return await generator.generate_single(policy.text, scenario, target_turns=target_turns)
|
|
253
|
+
|
|
254
|
+
tasks = [limited_generate(s) for s in scenarios]
|
|
255
|
+
return await asyncio.gather(*tasks)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# =============================================================================
|
|
259
|
+
# GOLDEN TRACE PHASES
|
|
260
|
+
# =============================================================================
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class LogicExtractionPhase:
|
|
264
|
+
"""
|
|
265
|
+
Logic Extraction phase (The Cartographer) - extracts rules as DAG.
|
|
266
|
+
|
|
267
|
+
This is Stage 1 of the Golden Trace pipeline.
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
async def execute(
|
|
271
|
+
self,
|
|
272
|
+
policy: Policy,
|
|
273
|
+
extractor: "LogicExtractor",
|
|
274
|
+
) -> "LogicMap":
|
|
275
|
+
"""
|
|
276
|
+
Execute logic extraction.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
policy: The policy to analyze
|
|
280
|
+
extractor: LogicExtractor component
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
LogicMap with extracted rules as DAG
|
|
284
|
+
"""
|
|
285
|
+
return await extractor.extract(policy.text)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class GoldenScenarioPhase:
|
|
289
|
+
"""
|
|
290
|
+
Golden Scenario phase (The Adversary) - generates typed scenarios.
|
|
291
|
+
|
|
292
|
+
Distributes scenarios across types:
|
|
293
|
+
- 35% positive (happy path)
|
|
294
|
+
- 30% negative (violations)
|
|
295
|
+
- 25% edge_case (boundaries)
|
|
296
|
+
- 10% irrelevant (out of scope)
|
|
297
|
+
|
|
298
|
+
This is Stage 2 of the Golden Trace pipeline.
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
async def execute(
|
|
302
|
+
self,
|
|
303
|
+
policy: Policy,
|
|
304
|
+
logic_map: "LogicMap",
|
|
305
|
+
plan: Plan,
|
|
306
|
+
generator: "GoldenScenarioGenerator",
|
|
307
|
+
semaphore: Semaphore,
|
|
308
|
+
) -> tuple[list["GoldenScenario"], dict[str, int]]:
|
|
309
|
+
"""
|
|
310
|
+
Execute golden scenario generation.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
policy: The policy text
|
|
314
|
+
logic_map: The extracted Logic Map
|
|
315
|
+
plan: Plan with categories
|
|
316
|
+
generator: GoldenScenarioGenerator component
|
|
317
|
+
semaphore: Semaphore for rate limiting
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Tuple of (scenarios, type distribution dict)
|
|
321
|
+
"""
|
|
322
|
+
async def limited_generate(category):
|
|
323
|
+
async with semaphore:
|
|
324
|
+
return await generator.generate(policy.text, logic_map, category, category.count)
|
|
325
|
+
|
|
326
|
+
tasks = [limited_generate(cat) for cat in plan.categories]
|
|
327
|
+
results = await asyncio.gather(*tasks)
|
|
328
|
+
|
|
329
|
+
# Flatten scenarios
|
|
330
|
+
all_scenarios = [s for batch in results for s in batch]
|
|
331
|
+
|
|
332
|
+
# Calculate distribution
|
|
333
|
+
distribution = {
|
|
334
|
+
"positive": 0,
|
|
335
|
+
"negative": 0,
|
|
336
|
+
"edge_case": 0,
|
|
337
|
+
"irrelevant": 0,
|
|
338
|
+
}
|
|
339
|
+
for s in all_scenarios:
|
|
340
|
+
distribution[s.scenario_type.value] += 1
|
|
341
|
+
|
|
342
|
+
return all_scenarios, distribution
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class GoldenTracePhase:
|
|
346
|
+
"""
|
|
347
|
+
Golden Trace phase (The Thinker) - generates traces with grounded reasoning.
|
|
348
|
+
|
|
349
|
+
Produces traces with:
|
|
350
|
+
- Chain-of-thought reasoning with rule citations
|
|
351
|
+
- Exclusionary reasoning (why rules don't apply)
|
|
352
|
+
- DAG-compliant dependency order
|
|
353
|
+
|
|
354
|
+
This is Stage 3 of the Golden Trace pipeline for CONVERSATION/INSTRUCTION.
|
|
355
|
+
"""
|
|
356
|
+
|
|
357
|
+
async def execute(
|
|
358
|
+
self,
|
|
359
|
+
policy: Policy,
|
|
360
|
+
logic_map: "LogicMap",
|
|
361
|
+
scenarios: list["GoldenScenario"],
|
|
362
|
+
generator: "GoldenResponseGenerator",
|
|
363
|
+
semaphore: Semaphore,
|
|
364
|
+
target_turns: int = 1,
|
|
365
|
+
) -> list[Trace]:
|
|
366
|
+
"""
|
|
367
|
+
Execute golden trace generation.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
policy: The policy text
|
|
371
|
+
logic_map: The extracted Logic Map
|
|
372
|
+
scenarios: List of golden scenarios
|
|
373
|
+
generator: GoldenResponseGenerator component
|
|
374
|
+
semaphore: Semaphore for rate limiting
|
|
375
|
+
target_turns: Number of conversation turns
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
List of traces with grounded reasoning
|
|
379
|
+
"""
|
|
380
|
+
async def limited_generate(scenario):
|
|
381
|
+
async with semaphore:
|
|
382
|
+
return await generator.generate_single(
|
|
383
|
+
policy.text, logic_map, scenario, target_turns
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
tasks = [limited_generate(s) for s in scenarios]
|
|
387
|
+
return await asyncio.gather(*tasks)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
class GoldenToolCallPhase:
|
|
391
|
+
"""
|
|
392
|
+
Golden Tool Call phase (The Thinker for Tools) - generates tool traces.
|
|
393
|
+
|
|
394
|
+
Produces tool call traces with:
|
|
395
|
+
- Rule citations for tool selection decisions
|
|
396
|
+
- Grounded reasoning linking rules to tool usage
|
|
397
|
+
- OpenAI function calling format
|
|
398
|
+
|
|
399
|
+
This is Stage 3 of the Golden Trace pipeline for TOOL_CALL.
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
async def execute(
|
|
403
|
+
self,
|
|
404
|
+
policy: Policy,
|
|
405
|
+
logic_map: "LogicMap",
|
|
406
|
+
scenarios: list["GoldenScenario"],
|
|
407
|
+
generator: "GoldenToolCallResponseGenerator",
|
|
408
|
+
semaphore: Semaphore,
|
|
409
|
+
target_turns: int = 1,
|
|
410
|
+
) -> list[Trace]:
|
|
411
|
+
"""
|
|
412
|
+
Execute golden tool call trace generation.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
policy: The policy text
|
|
416
|
+
logic_map: The extracted Logic Map
|
|
417
|
+
scenarios: List of golden scenarios
|
|
418
|
+
generator: GoldenToolCallResponseGenerator component
|
|
419
|
+
semaphore: Semaphore for rate limiting
|
|
420
|
+
target_turns: Number of conversation turns
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
List of traces with tool calling format
|
|
424
|
+
"""
|
|
425
|
+
async def limited_generate(scenario):
|
|
426
|
+
async with semaphore:
|
|
427
|
+
return await generator.generate_single(
|
|
428
|
+
policy.text, logic_map, scenario, target_turns
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
tasks = [limited_generate(s) for s in scenarios]
|
|
432
|
+
return await asyncio.gather(*tasks)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class VerificationPhase:
|
|
436
|
+
"""
|
|
437
|
+
Verification phase (The Auditor) - verifies traces against Logic Map.
|
|
438
|
+
|
|
439
|
+
Checks:
|
|
440
|
+
- No skipped rules
|
|
441
|
+
- No hallucinated rules
|
|
442
|
+
- No contradictions
|
|
443
|
+
- DAG compliance
|
|
444
|
+
|
|
445
|
+
This is Stage 4 of the Golden Trace pipeline.
|
|
446
|
+
"""
|
|
447
|
+
|
|
448
|
+
async def execute(
|
|
449
|
+
self,
|
|
450
|
+
policy: Policy,
|
|
451
|
+
logic_map: "LogicMap",
|
|
452
|
+
scenarios: list["GoldenScenario"],
|
|
453
|
+
traces: list[Trace],
|
|
454
|
+
verifier: "TraceVerifier",
|
|
455
|
+
refiner: "GoldenRefiner",
|
|
456
|
+
max_iterations: int,
|
|
457
|
+
semaphore: Semaphore,
|
|
458
|
+
) -> tuple[list[Trace], float]:
|
|
459
|
+
"""
|
|
460
|
+
Execute verification and refinement.
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
policy: The policy text
|
|
464
|
+
logic_map: The Logic Map (ground truth)
|
|
465
|
+
scenarios: The golden scenarios (for verification context)
|
|
466
|
+
traces: List of traces to verify
|
|
467
|
+
verifier: TraceVerifier component
|
|
468
|
+
refiner: GoldenRefiner component
|
|
469
|
+
max_iterations: Maximum refinement iterations
|
|
470
|
+
semaphore: Semaphore for rate limiting
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Tuple of (verified traces, pass rate percentage)
|
|
474
|
+
"""
|
|
475
|
+
async def limited_verify(trace, scenario):
|
|
476
|
+
async with semaphore:
|
|
477
|
+
verification, grade = await verifier.verify_and_grade(
|
|
478
|
+
trace, logic_map, scenario
|
|
479
|
+
)
|
|
480
|
+
return verification, grade
|
|
481
|
+
|
|
482
|
+
async def limited_refine(trace, scenario, verification):
|
|
483
|
+
async with semaphore:
|
|
484
|
+
return await refiner.refine(trace, logic_map, scenario, verification)
|
|
485
|
+
|
|
486
|
+
# Create scenario lookup by matching trace.scenario.description
|
|
487
|
+
scenario_lookup = {s.description: s for s in scenarios}
|
|
488
|
+
|
|
489
|
+
# Initial verification
|
|
490
|
+
verify_tasks = []
|
|
491
|
+
for trace in traces:
|
|
492
|
+
# Find matching scenario
|
|
493
|
+
scenario = scenario_lookup.get(trace.scenario.description)
|
|
494
|
+
if not scenario:
|
|
495
|
+
# Create a minimal GoldenScenario from the trace scenario
|
|
496
|
+
from synkro.types.logic_map import GoldenScenario, ScenarioType
|
|
497
|
+
scenario = GoldenScenario(
|
|
498
|
+
description=trace.scenario.description,
|
|
499
|
+
context=trace.scenario.context or "",
|
|
500
|
+
category=trace.scenario.category or "",
|
|
501
|
+
scenario_type=ScenarioType.POSITIVE,
|
|
502
|
+
target_rule_ids=[],
|
|
503
|
+
expected_outcome="",
|
|
504
|
+
)
|
|
505
|
+
verify_tasks.append(limited_verify(trace, scenario))
|
|
506
|
+
|
|
507
|
+
results = await asyncio.gather(*verify_tasks)
|
|
508
|
+
|
|
509
|
+
# Attach grades and track verifications
|
|
510
|
+
final_traces = list(traces)
|
|
511
|
+
verifications = []
|
|
512
|
+
for i, (verification, grade) in enumerate(results):
|
|
513
|
+
final_traces[i].grade = grade
|
|
514
|
+
verifications.append(verification)
|
|
515
|
+
|
|
516
|
+
# Refinement loop
|
|
517
|
+
for iteration in range(1, max_iterations):
|
|
518
|
+
failed_indices = [
|
|
519
|
+
i for i, v in enumerate(verifications) if not v.passed
|
|
520
|
+
]
|
|
521
|
+
|
|
522
|
+
if not failed_indices:
|
|
523
|
+
break
|
|
524
|
+
|
|
525
|
+
# Refine failed traces
|
|
526
|
+
refine_tasks = []
|
|
527
|
+
for i in failed_indices:
|
|
528
|
+
scenario = scenario_lookup.get(final_traces[i].scenario.description)
|
|
529
|
+
if not scenario:
|
|
530
|
+
from synkro.types.logic_map import GoldenScenario, ScenarioType
|
|
531
|
+
scenario = GoldenScenario(
|
|
532
|
+
description=final_traces[i].scenario.description,
|
|
533
|
+
context=final_traces[i].scenario.context or "",
|
|
534
|
+
category=final_traces[i].scenario.category or "",
|
|
535
|
+
scenario_type=ScenarioType.POSITIVE,
|
|
536
|
+
target_rule_ids=[],
|
|
537
|
+
expected_outcome="",
|
|
538
|
+
)
|
|
539
|
+
refine_tasks.append(
|
|
540
|
+
limited_refine(final_traces[i], scenario, verifications[i])
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
refined_traces = await asyncio.gather(*refine_tasks)
|
|
544
|
+
|
|
545
|
+
# Update traces
|
|
546
|
+
for idx, refined in zip(failed_indices, refined_traces):
|
|
547
|
+
refined.scenario = final_traces[idx].scenario
|
|
548
|
+
final_traces[idx] = refined
|
|
549
|
+
|
|
550
|
+
# Re-verify
|
|
551
|
+
reverify_tasks = []
|
|
552
|
+
for i in failed_indices:
|
|
553
|
+
scenario = scenario_lookup.get(final_traces[i].scenario.description)
|
|
554
|
+
if not scenario:
|
|
555
|
+
from synkro.types.logic_map import GoldenScenario, ScenarioType
|
|
556
|
+
scenario = GoldenScenario(
|
|
557
|
+
description=final_traces[i].scenario.description,
|
|
558
|
+
context=final_traces[i].scenario.context or "",
|
|
559
|
+
category=final_traces[i].scenario.category or "",
|
|
560
|
+
scenario_type=ScenarioType.POSITIVE,
|
|
561
|
+
target_rule_ids=[],
|
|
562
|
+
expected_outcome="",
|
|
563
|
+
)
|
|
564
|
+
reverify_tasks.append(limited_verify(final_traces[i], scenario))
|
|
565
|
+
|
|
566
|
+
new_results = await asyncio.gather(*reverify_tasks)
|
|
567
|
+
|
|
568
|
+
for idx, (verification, grade) in zip(failed_indices, new_results):
|
|
569
|
+
final_traces[idx].grade = grade
|
|
570
|
+
verifications[idx] = verification
|
|
571
|
+
|
|
572
|
+
# Calculate pass rate
|
|
573
|
+
passed_count = sum(1 for v in verifications if v.passed)
|
|
574
|
+
pass_rate = (passed_count / len(verifications) * 100) if verifications else 0
|
|
575
|
+
|
|
576
|
+
return final_traces, pass_rate
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
__all__ = [
|
|
580
|
+
"PlanPhase",
|
|
581
|
+
"ScenarioPhase",
|
|
582
|
+
"ResponsePhase",
|
|
583
|
+
"GradingPhase",
|
|
584
|
+
"ToolCallResponsePhase",
|
|
585
|
+
# Golden Trace phases
|
|
586
|
+
"LogicExtractionPhase",
|
|
587
|
+
"GoldenScenarioPhase",
|
|
588
|
+
"GoldenTracePhase",
|
|
589
|
+
"GoldenToolCallPhase",
|
|
590
|
+
"VerificationPhase",
|
|
591
|
+
]
|
|
592
|
+
|