synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Golden Scenario Generator - The Adversary.
|
|
2
|
+
|
|
3
|
+
Generates typed scenarios (positive, negative, edge_case, irrelevant)
|
|
4
|
+
with explicit rule targeting. This is Stage 2 of the Golden Trace pipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
from typing import Literal
|
|
9
|
+
|
|
10
|
+
from synkro.llm.client import LLM
|
|
11
|
+
from synkro.models import Model, OpenAI
|
|
12
|
+
from synkro.schemas import GoldenScenariosArray
|
|
13
|
+
from synkro.types.core import Category
|
|
14
|
+
from synkro.types.logic_map import LogicMap, GoldenScenario, ScenarioType
|
|
15
|
+
from synkro.prompts.golden_templates import (
|
|
16
|
+
GOLDEN_SCENARIO_PROMPT,
|
|
17
|
+
GOLDEN_SCENARIO_BATCHED_PROMPT,
|
|
18
|
+
POSITIVE_SCENARIO_INSTRUCTIONS,
|
|
19
|
+
NEGATIVE_SCENARIO_INSTRUCTIONS,
|
|
20
|
+
EDGE_CASE_SCENARIO_INSTRUCTIONS,
|
|
21
|
+
IRRELEVANT_SCENARIO_INSTRUCTIONS,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Default scenario type distribution
|
|
26
|
+
DEFAULT_DISTRIBUTION = {
|
|
27
|
+
ScenarioType.POSITIVE: 0.35, # 35% happy path
|
|
28
|
+
ScenarioType.NEGATIVE: 0.30, # 30% violations
|
|
29
|
+
ScenarioType.EDGE_CASE: 0.25, # 25% edge cases
|
|
30
|
+
ScenarioType.IRRELEVANT: 0.10, # 10% out of scope
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
TYPE_INSTRUCTIONS = {
|
|
35
|
+
ScenarioType.POSITIVE: POSITIVE_SCENARIO_INSTRUCTIONS,
|
|
36
|
+
ScenarioType.NEGATIVE: NEGATIVE_SCENARIO_INSTRUCTIONS,
|
|
37
|
+
ScenarioType.EDGE_CASE: EDGE_CASE_SCENARIO_INSTRUCTIONS,
|
|
38
|
+
ScenarioType.IRRELEVANT: IRRELEVANT_SCENARIO_INSTRUCTIONS,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class GoldenScenarioGenerator:
|
|
43
|
+
"""
|
|
44
|
+
The Adversary - Generates typed scenarios with rule targeting.
|
|
45
|
+
|
|
46
|
+
Produces scenarios across four types:
|
|
47
|
+
- POSITIVE (35%): Happy path, all criteria met
|
|
48
|
+
- NEGATIVE (30%): Violation, exactly one criterion fails
|
|
49
|
+
- EDGE_CASE (25%): Boundary conditions, exact limits
|
|
50
|
+
- IRRELEVANT (10%): Outside policy scope
|
|
51
|
+
|
|
52
|
+
Each scenario includes:
|
|
53
|
+
- Target rule IDs it's designed to test
|
|
54
|
+
- Expected outcome based on the rules
|
|
55
|
+
- Scenario type for classification
|
|
56
|
+
|
|
57
|
+
Examples:
|
|
58
|
+
>>> generator = GoldenScenarioGenerator(llm=LLM(model=OpenAI.GPT_4O_MINI))
|
|
59
|
+
>>> scenarios = await generator.generate(
|
|
60
|
+
... policy_text="...",
|
|
61
|
+
... logic_map=logic_map,
|
|
62
|
+
... category=category,
|
|
63
|
+
... count=10,
|
|
64
|
+
... )
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
llm: LLM | None = None,
|
|
70
|
+
model: Model = OpenAI.GPT_4O_MINI,
|
|
71
|
+
distribution: dict[ScenarioType, float] | None = None,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Initialize the Golden Scenario Generator.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
llm: LLM client to use (creates one if not provided)
|
|
78
|
+
model: Model to use if creating LLM
|
|
79
|
+
distribution: Custom scenario type distribution (defaults to 35/30/25/10)
|
|
80
|
+
"""
|
|
81
|
+
self.llm = llm or LLM(model=model, temperature=0.8)
|
|
82
|
+
self.distribution = distribution or DEFAULT_DISTRIBUTION
|
|
83
|
+
|
|
84
|
+
async def generate(
|
|
85
|
+
self,
|
|
86
|
+
policy_text: str,
|
|
87
|
+
logic_map: LogicMap,
|
|
88
|
+
category: Category,
|
|
89
|
+
count: int,
|
|
90
|
+
) -> list[GoldenScenario]:
|
|
91
|
+
"""
|
|
92
|
+
Generate scenarios for a category with balanced type distribution.
|
|
93
|
+
|
|
94
|
+
Uses batched generation (single LLM call per category) for efficiency.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
policy_text: The policy document text
|
|
98
|
+
logic_map: The extracted Logic Map (DAG of rules)
|
|
99
|
+
category: The category to generate scenarios for
|
|
100
|
+
count: Total number of scenarios to generate
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of GoldenScenarios with type distribution
|
|
104
|
+
"""
|
|
105
|
+
# Calculate counts per type based on distribution
|
|
106
|
+
type_counts = self._calculate_type_distribution(count)
|
|
107
|
+
|
|
108
|
+
# Use batched generation (single call for all types)
|
|
109
|
+
return await self._generate_batched(
|
|
110
|
+
policy_text=policy_text,
|
|
111
|
+
logic_map=logic_map,
|
|
112
|
+
category=category,
|
|
113
|
+
type_counts=type_counts,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def _calculate_type_distribution(self, total: int) -> dict[ScenarioType, int]:
|
|
117
|
+
"""Calculate how many scenarios of each type to generate."""
|
|
118
|
+
counts = {}
|
|
119
|
+
remaining = total
|
|
120
|
+
|
|
121
|
+
# For small counts, prioritize non-IRRELEVANT types
|
|
122
|
+
# IRRELEVANT should only appear when we have enough scenarios
|
|
123
|
+
priority_order = [
|
|
124
|
+
ScenarioType.POSITIVE,
|
|
125
|
+
ScenarioType.NEGATIVE,
|
|
126
|
+
ScenarioType.EDGE_CASE,
|
|
127
|
+
ScenarioType.IRRELEVANT, # Last priority
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
if total <= 3:
|
|
131
|
+
# For very small counts, assign one to each priority type until exhausted
|
|
132
|
+
for stype in priority_order:
|
|
133
|
+
if remaining > 0:
|
|
134
|
+
counts[stype] = 1
|
|
135
|
+
remaining -= 1
|
|
136
|
+
else:
|
|
137
|
+
counts[stype] = 0
|
|
138
|
+
else:
|
|
139
|
+
# Normal distribution for larger counts
|
|
140
|
+
for i, (stype, ratio) in enumerate(self.distribution.items()):
|
|
141
|
+
if i == len(self.distribution) - 1:
|
|
142
|
+
# Last type gets remaining to ensure total is exact
|
|
143
|
+
counts[stype] = remaining
|
|
144
|
+
else:
|
|
145
|
+
count = round(total * ratio)
|
|
146
|
+
counts[stype] = count
|
|
147
|
+
remaining -= count
|
|
148
|
+
|
|
149
|
+
return counts
|
|
150
|
+
|
|
151
|
+
async def _generate_batched(
|
|
152
|
+
self,
|
|
153
|
+
policy_text: str,
|
|
154
|
+
logic_map: LogicMap,
|
|
155
|
+
category: Category,
|
|
156
|
+
type_counts: dict[ScenarioType, int],
|
|
157
|
+
) -> list[GoldenScenario]:
|
|
158
|
+
"""
|
|
159
|
+
Generate all scenario types in a single LLM call (batched).
|
|
160
|
+
|
|
161
|
+
This is more efficient than making separate calls per type.
|
|
162
|
+
Includes retry logic if the LLM doesn't return the exact count.
|
|
163
|
+
"""
|
|
164
|
+
# Format Logic Map for prompt
|
|
165
|
+
logic_map_str = self._format_logic_map(logic_map)
|
|
166
|
+
|
|
167
|
+
# Calculate total
|
|
168
|
+
total_count = sum(type_counts.values())
|
|
169
|
+
|
|
170
|
+
# Build batched prompt
|
|
171
|
+
prompt = GOLDEN_SCENARIO_BATCHED_PROMPT.format(
|
|
172
|
+
policy_text=policy_text,
|
|
173
|
+
logic_map=logic_map_str,
|
|
174
|
+
category=category.name,
|
|
175
|
+
positive_count=type_counts.get(ScenarioType.POSITIVE, 0),
|
|
176
|
+
negative_count=type_counts.get(ScenarioType.NEGATIVE, 0),
|
|
177
|
+
edge_case_count=type_counts.get(ScenarioType.EDGE_CASE, 0),
|
|
178
|
+
irrelevant_count=type_counts.get(ScenarioType.IRRELEVANT, 0),
|
|
179
|
+
total_count=total_count,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Generate with retry (max 1 retry if count is wrong)
|
|
183
|
+
scenarios = await self._generate_and_parse(prompt, category.name, total_count)
|
|
184
|
+
|
|
185
|
+
# Retry once if count doesn't match
|
|
186
|
+
if len(scenarios) != total_count:
|
|
187
|
+
retry_prompt = prompt + f"\n\nIMPORTANT: You must generate EXACTLY {total_count} scenarios. You previously generated {len(scenarios)}."
|
|
188
|
+
retry_scenarios = await self._generate_and_parse(retry_prompt, category.name, total_count)
|
|
189
|
+
|
|
190
|
+
# Use retry result if it's closer to target, otherwise keep original
|
|
191
|
+
if abs(len(retry_scenarios) - total_count) < abs(len(scenarios) - total_count):
|
|
192
|
+
scenarios = retry_scenarios
|
|
193
|
+
|
|
194
|
+
# Truncate if over, accept if under (after retry)
|
|
195
|
+
return scenarios[:total_count]
|
|
196
|
+
|
|
197
|
+
async def _generate_and_parse(
|
|
198
|
+
self,
|
|
199
|
+
prompt: str,
|
|
200
|
+
category_name: str,
|
|
201
|
+
expected_count: int,
|
|
202
|
+
) -> list[GoldenScenario]:
|
|
203
|
+
"""Generate scenarios and parse to domain models."""
|
|
204
|
+
result = await self.llm.generate_structured(prompt, GoldenScenariosArray)
|
|
205
|
+
|
|
206
|
+
scenarios = []
|
|
207
|
+
for s in result.scenarios:
|
|
208
|
+
scenario = GoldenScenario(
|
|
209
|
+
description=s.description,
|
|
210
|
+
context=s.context,
|
|
211
|
+
category=category_name,
|
|
212
|
+
scenario_type=ScenarioType(s.scenario_type),
|
|
213
|
+
target_rule_ids=s.target_rule_ids,
|
|
214
|
+
expected_outcome=s.expected_outcome,
|
|
215
|
+
)
|
|
216
|
+
scenarios.append(scenario)
|
|
217
|
+
|
|
218
|
+
return scenarios
|
|
219
|
+
|
|
220
|
+
async def _generate_type(
|
|
221
|
+
self,
|
|
222
|
+
policy_text: str,
|
|
223
|
+
logic_map: LogicMap,
|
|
224
|
+
category: Category,
|
|
225
|
+
scenario_type: ScenarioType,
|
|
226
|
+
count: int,
|
|
227
|
+
) -> list[GoldenScenario]:
|
|
228
|
+
"""Generate scenarios of a specific type."""
|
|
229
|
+
# Get type-specific instructions
|
|
230
|
+
type_instructions = TYPE_INSTRUCTIONS[scenario_type]
|
|
231
|
+
|
|
232
|
+
# Format Logic Map for prompt
|
|
233
|
+
logic_map_str = self._format_logic_map(logic_map)
|
|
234
|
+
|
|
235
|
+
# Build prompt
|
|
236
|
+
prompt = GOLDEN_SCENARIO_PROMPT.format(
|
|
237
|
+
scenario_type=scenario_type.value.upper(),
|
|
238
|
+
policy_text=policy_text,
|
|
239
|
+
logic_map=logic_map_str,
|
|
240
|
+
category=category.name,
|
|
241
|
+
count=count,
|
|
242
|
+
type_specific_instructions=type_instructions,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Generate structured output
|
|
246
|
+
result = await self.llm.generate_structured(prompt, GoldenScenariosArray)
|
|
247
|
+
|
|
248
|
+
# Convert to domain models
|
|
249
|
+
scenarios = []
|
|
250
|
+
for s in result.scenarios:
|
|
251
|
+
scenario = GoldenScenario(
|
|
252
|
+
description=s.description,
|
|
253
|
+
context=s.context,
|
|
254
|
+
category=category.name,
|
|
255
|
+
scenario_type=ScenarioType(s.scenario_type),
|
|
256
|
+
target_rule_ids=s.target_rule_ids,
|
|
257
|
+
expected_outcome=s.expected_outcome,
|
|
258
|
+
)
|
|
259
|
+
scenarios.append(scenario)
|
|
260
|
+
|
|
261
|
+
# Enforce requested count (LLM may return more or fewer)
|
|
262
|
+
return scenarios[:count]
|
|
263
|
+
|
|
264
|
+
def _format_logic_map(self, logic_map: LogicMap) -> str:
|
|
265
|
+
"""Format Logic Map for prompt inclusion."""
|
|
266
|
+
lines = []
|
|
267
|
+
lines.append("RULES:")
|
|
268
|
+
for rule in logic_map.rules:
|
|
269
|
+
deps = f" (depends on: {', '.join(rule.dependencies)})" if rule.dependencies else ""
|
|
270
|
+
lines.append(
|
|
271
|
+
f" {rule.rule_id} [{rule.category.value}]: {rule.text}{deps}"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
lines.append("\nROOT RULES (Entry Points):")
|
|
275
|
+
lines.append(f" {', '.join(logic_map.root_rules)}")
|
|
276
|
+
|
|
277
|
+
return "\n".join(lines)
|
|
278
|
+
|
|
279
|
+
async def generate_for_categories(
|
|
280
|
+
self,
|
|
281
|
+
policy_text: str,
|
|
282
|
+
logic_map: LogicMap,
|
|
283
|
+
categories: list[Category],
|
|
284
|
+
) -> tuple[list[GoldenScenario], dict[str, int]]:
|
|
285
|
+
"""
|
|
286
|
+
Generate scenarios for multiple categories with distribution tracking.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
policy_text: The policy document text
|
|
290
|
+
logic_map: The extracted Logic Map
|
|
291
|
+
categories: List of categories with counts
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Tuple of (all scenarios, type distribution counts)
|
|
295
|
+
"""
|
|
296
|
+
# Generate for each category in parallel
|
|
297
|
+
tasks = [
|
|
298
|
+
self.generate(policy_text, logic_map, cat, cat.count)
|
|
299
|
+
for cat in categories
|
|
300
|
+
]
|
|
301
|
+
results = await asyncio.gather(*tasks)
|
|
302
|
+
|
|
303
|
+
# Flatten scenarios
|
|
304
|
+
all_scenarios = []
|
|
305
|
+
for batch in results:
|
|
306
|
+
all_scenarios.extend(batch)
|
|
307
|
+
|
|
308
|
+
# Calculate distribution
|
|
309
|
+
distribution = {
|
|
310
|
+
ScenarioType.POSITIVE.value: 0,
|
|
311
|
+
ScenarioType.NEGATIVE.value: 0,
|
|
312
|
+
ScenarioType.EDGE_CASE.value: 0,
|
|
313
|
+
ScenarioType.IRRELEVANT.value: 0,
|
|
314
|
+
}
|
|
315
|
+
for s in all_scenarios:
|
|
316
|
+
distribution[s.scenario_type.value] += 1
|
|
317
|
+
|
|
318
|
+
return all_scenarios, distribution
|
|
319
|
+
|
|
320
|
+
def get_distribution_summary(self, scenarios: list[GoldenScenario]) -> dict[str, int]:
|
|
321
|
+
"""Get a summary of scenario type distribution."""
|
|
322
|
+
distribution = {
|
|
323
|
+
"positive": 0,
|
|
324
|
+
"negative": 0,
|
|
325
|
+
"edge_case": 0,
|
|
326
|
+
"irrelevant": 0,
|
|
327
|
+
}
|
|
328
|
+
for s in scenarios:
|
|
329
|
+
distribution[s.scenario_type.value] += 1
|
|
330
|
+
return distribution
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
__all__ = ["GoldenScenarioGenerator", "DEFAULT_DISTRIBUTION"]
|