synkro 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synkro might be problematic. Click here for more details.

Files changed (81) hide show
  1. synkro/__init__.py +331 -0
  2. synkro/advanced.py +184 -0
  3. synkro/cli.py +156 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +432 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +291 -0
  11. synkro/formatters/__init__.py +18 -0
  12. synkro/formatters/chatml.py +121 -0
  13. synkro/formatters/langfuse.py +98 -0
  14. synkro/formatters/langsmith.py +98 -0
  15. synkro/formatters/qa.py +112 -0
  16. synkro/formatters/sft.py +90 -0
  17. synkro/formatters/tool_call.py +127 -0
  18. synkro/generation/__init__.py +9 -0
  19. synkro/generation/follow_ups.py +134 -0
  20. synkro/generation/generator.py +314 -0
  21. synkro/generation/golden_responses.py +269 -0
  22. synkro/generation/golden_scenarios.py +333 -0
  23. synkro/generation/golden_tool_responses.py +791 -0
  24. synkro/generation/logic_extractor.py +126 -0
  25. synkro/generation/multiturn_responses.py +177 -0
  26. synkro/generation/planner.py +131 -0
  27. synkro/generation/responses.py +189 -0
  28. synkro/generation/scenarios.py +90 -0
  29. synkro/generation/tool_responses.py +625 -0
  30. synkro/generation/tool_simulator.py +114 -0
  31. synkro/interactive/__init__.py +16 -0
  32. synkro/interactive/hitl_session.py +205 -0
  33. synkro/interactive/intent_classifier.py +94 -0
  34. synkro/interactive/logic_map_editor.py +176 -0
  35. synkro/interactive/rich_ui.py +459 -0
  36. synkro/interactive/scenario_editor.py +198 -0
  37. synkro/llm/__init__.py +7 -0
  38. synkro/llm/client.py +309 -0
  39. synkro/llm/rate_limits.py +99 -0
  40. synkro/models/__init__.py +50 -0
  41. synkro/models/anthropic.py +26 -0
  42. synkro/models/google.py +19 -0
  43. synkro/models/local.py +104 -0
  44. synkro/models/openai.py +31 -0
  45. synkro/modes/__init__.py +13 -0
  46. synkro/modes/config.py +66 -0
  47. synkro/modes/conversation.py +35 -0
  48. synkro/modes/tool_call.py +18 -0
  49. synkro/parsers.py +442 -0
  50. synkro/pipeline/__init__.py +20 -0
  51. synkro/pipeline/phases.py +592 -0
  52. synkro/pipeline/runner.py +769 -0
  53. synkro/pipelines.py +136 -0
  54. synkro/prompts/__init__.py +57 -0
  55. synkro/prompts/base.py +167 -0
  56. synkro/prompts/golden_templates.py +533 -0
  57. synkro/prompts/interactive_templates.py +198 -0
  58. synkro/prompts/multiturn_templates.py +156 -0
  59. synkro/prompts/templates.py +281 -0
  60. synkro/prompts/tool_templates.py +318 -0
  61. synkro/quality/__init__.py +14 -0
  62. synkro/quality/golden_refiner.py +163 -0
  63. synkro/quality/grader.py +153 -0
  64. synkro/quality/multiturn_grader.py +150 -0
  65. synkro/quality/refiner.py +137 -0
  66. synkro/quality/tool_grader.py +126 -0
  67. synkro/quality/tool_refiner.py +128 -0
  68. synkro/quality/verifier.py +228 -0
  69. synkro/reporting.py +464 -0
  70. synkro/schemas.py +521 -0
  71. synkro/types/__init__.py +43 -0
  72. synkro/types/core.py +153 -0
  73. synkro/types/dataset_type.py +33 -0
  74. synkro/types/logic_map.py +348 -0
  75. synkro/types/tool.py +94 -0
  76. synkro-0.4.36.data/data/examples/__init__.py +148 -0
  77. synkro-0.4.36.dist-info/METADATA +507 -0
  78. synkro-0.4.36.dist-info/RECORD +81 -0
  79. synkro-0.4.36.dist-info/WHEEL +4 -0
  80. synkro-0.4.36.dist-info/entry_points.txt +2 -0
  81. synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,333 @@
1
+ """Golden Scenario Generator - The Adversary.
2
+
3
+ Generates typed scenarios (positive, negative, edge_case, irrelevant)
4
+ with explicit rule targeting. This is Stage 2 of the Golden Trace pipeline.
5
+ """
6
+
7
+ import asyncio
8
+ from typing import Literal
9
+
10
+ from synkro.llm.client import LLM
11
+ from synkro.models import Model, OpenAI
12
+ from synkro.schemas import GoldenScenariosArray
13
+ from synkro.types.core import Category
14
+ from synkro.types.logic_map import LogicMap, GoldenScenario, ScenarioType
15
+ from synkro.prompts.golden_templates import (
16
+ GOLDEN_SCENARIO_PROMPT,
17
+ GOLDEN_SCENARIO_BATCHED_PROMPT,
18
+ POSITIVE_SCENARIO_INSTRUCTIONS,
19
+ NEGATIVE_SCENARIO_INSTRUCTIONS,
20
+ EDGE_CASE_SCENARIO_INSTRUCTIONS,
21
+ IRRELEVANT_SCENARIO_INSTRUCTIONS,
22
+ )
23
+
24
+
25
+ # Default scenario type distribution
26
+ DEFAULT_DISTRIBUTION = {
27
+ ScenarioType.POSITIVE: 0.35, # 35% happy path
28
+ ScenarioType.NEGATIVE: 0.30, # 30% violations
29
+ ScenarioType.EDGE_CASE: 0.25, # 25% edge cases
30
+ ScenarioType.IRRELEVANT: 0.10, # 10% out of scope
31
+ }
32
+
33
+
34
+ TYPE_INSTRUCTIONS = {
35
+ ScenarioType.POSITIVE: POSITIVE_SCENARIO_INSTRUCTIONS,
36
+ ScenarioType.NEGATIVE: NEGATIVE_SCENARIO_INSTRUCTIONS,
37
+ ScenarioType.EDGE_CASE: EDGE_CASE_SCENARIO_INSTRUCTIONS,
38
+ ScenarioType.IRRELEVANT: IRRELEVANT_SCENARIO_INSTRUCTIONS,
39
+ }
40
+
41
+
42
+ class GoldenScenarioGenerator:
43
+ """
44
+ The Adversary - Generates typed scenarios with rule targeting.
45
+
46
+ Produces scenarios across four types:
47
+ - POSITIVE (35%): Happy path, all criteria met
48
+ - NEGATIVE (30%): Violation, exactly one criterion fails
49
+ - EDGE_CASE (25%): Boundary conditions, exact limits
50
+ - IRRELEVANT (10%): Outside policy scope
51
+
52
+ Each scenario includes:
53
+ - Target rule IDs it's designed to test
54
+ - Expected outcome based on the rules
55
+ - Scenario type for classification
56
+
57
+ Examples:
58
+ >>> generator = GoldenScenarioGenerator(llm=LLM(model=OpenAI.GPT_4O_MINI))
59
+ >>> scenarios = await generator.generate(
60
+ ... policy_text="...",
61
+ ... logic_map=logic_map,
62
+ ... category=category,
63
+ ... count=10,
64
+ ... )
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ llm: LLM | None = None,
70
+ model: Model = OpenAI.GPT_4O_MINI,
71
+ distribution: dict[ScenarioType, float] | None = None,
72
+ ):
73
+ """
74
+ Initialize the Golden Scenario Generator.
75
+
76
+ Args:
77
+ llm: LLM client to use (creates one if not provided)
78
+ model: Model to use if creating LLM
79
+ distribution: Custom scenario type distribution (defaults to 35/30/25/10)
80
+ """
81
+ self.llm = llm or LLM(model=model, temperature=0.8)
82
+ self.distribution = distribution or DEFAULT_DISTRIBUTION
83
+
84
+ async def generate(
85
+ self,
86
+ policy_text: str,
87
+ logic_map: LogicMap,
88
+ category: Category,
89
+ count: int,
90
+ ) -> list[GoldenScenario]:
91
+ """
92
+ Generate scenarios for a category with balanced type distribution.
93
+
94
+ Uses batched generation (single LLM call per category) for efficiency.
95
+
96
+ Args:
97
+ policy_text: The policy document text
98
+ logic_map: The extracted Logic Map (DAG of rules)
99
+ category: The category to generate scenarios for
100
+ count: Total number of scenarios to generate
101
+
102
+ Returns:
103
+ List of GoldenScenarios with type distribution
104
+ """
105
+ # Calculate counts per type based on distribution
106
+ type_counts = self._calculate_type_distribution(count)
107
+
108
+ # Use batched generation (single call for all types)
109
+ return await self._generate_batched(
110
+ policy_text=policy_text,
111
+ logic_map=logic_map,
112
+ category=category,
113
+ type_counts=type_counts,
114
+ )
115
+
116
+ def _calculate_type_distribution(self, total: int) -> dict[ScenarioType, int]:
117
+ """Calculate how many scenarios of each type to generate."""
118
+ counts = {}
119
+ remaining = total
120
+
121
+ # For small counts, prioritize non-IRRELEVANT types
122
+ # IRRELEVANT should only appear when we have enough scenarios
123
+ priority_order = [
124
+ ScenarioType.POSITIVE,
125
+ ScenarioType.NEGATIVE,
126
+ ScenarioType.EDGE_CASE,
127
+ ScenarioType.IRRELEVANT, # Last priority
128
+ ]
129
+
130
+ if total <= 3:
131
+ # For very small counts, assign one to each priority type until exhausted
132
+ for stype in priority_order:
133
+ if remaining > 0:
134
+ counts[stype] = 1
135
+ remaining -= 1
136
+ else:
137
+ counts[stype] = 0
138
+ else:
139
+ # Normal distribution for larger counts
140
+ for i, (stype, ratio) in enumerate(self.distribution.items()):
141
+ if i == len(self.distribution) - 1:
142
+ # Last type gets remaining to ensure total is exact
143
+ counts[stype] = remaining
144
+ else:
145
+ count = round(total * ratio)
146
+ counts[stype] = count
147
+ remaining -= count
148
+
149
+ return counts
150
+
151
+ async def _generate_batched(
152
+ self,
153
+ policy_text: str,
154
+ logic_map: LogicMap,
155
+ category: Category,
156
+ type_counts: dict[ScenarioType, int],
157
+ ) -> list[GoldenScenario]:
158
+ """
159
+ Generate all scenario types in a single LLM call (batched).
160
+
161
+ This is more efficient than making separate calls per type.
162
+ Includes retry logic if the LLM doesn't return the exact count.
163
+ """
164
+ # Format Logic Map for prompt
165
+ logic_map_str = self._format_logic_map(logic_map)
166
+
167
+ # Calculate total
168
+ total_count = sum(type_counts.values())
169
+
170
+ # Build batched prompt
171
+ prompt = GOLDEN_SCENARIO_BATCHED_PROMPT.format(
172
+ policy_text=policy_text,
173
+ logic_map=logic_map_str,
174
+ category=category.name,
175
+ positive_count=type_counts.get(ScenarioType.POSITIVE, 0),
176
+ negative_count=type_counts.get(ScenarioType.NEGATIVE, 0),
177
+ edge_case_count=type_counts.get(ScenarioType.EDGE_CASE, 0),
178
+ irrelevant_count=type_counts.get(ScenarioType.IRRELEVANT, 0),
179
+ total_count=total_count,
180
+ )
181
+
182
+ # Generate with retry (max 1 retry if count is wrong)
183
+ scenarios = await self._generate_and_parse(prompt, category.name, total_count)
184
+
185
+ # Retry once if count doesn't match
186
+ if len(scenarios) != total_count:
187
+ retry_prompt = prompt + f"\n\nIMPORTANT: You must generate EXACTLY {total_count} scenarios. You previously generated {len(scenarios)}."
188
+ retry_scenarios = await self._generate_and_parse(retry_prompt, category.name, total_count)
189
+
190
+ # Use retry result if it's closer to target, otherwise keep original
191
+ if abs(len(retry_scenarios) - total_count) < abs(len(scenarios) - total_count):
192
+ scenarios = retry_scenarios
193
+
194
+ # Truncate if over, accept if under (after retry)
195
+ return scenarios[:total_count]
196
+
197
+ async def _generate_and_parse(
198
+ self,
199
+ prompt: str,
200
+ category_name: str,
201
+ expected_count: int,
202
+ ) -> list[GoldenScenario]:
203
+ """Generate scenarios and parse to domain models."""
204
+ result = await self.llm.generate_structured(prompt, GoldenScenariosArray)
205
+
206
+ scenarios = []
207
+ for s in result.scenarios:
208
+ scenario = GoldenScenario(
209
+ description=s.description,
210
+ context=s.context,
211
+ category=category_name,
212
+ scenario_type=ScenarioType(s.scenario_type),
213
+ target_rule_ids=s.target_rule_ids,
214
+ expected_outcome=s.expected_outcome,
215
+ )
216
+ scenarios.append(scenario)
217
+
218
+ return scenarios
219
+
220
+ async def _generate_type(
221
+ self,
222
+ policy_text: str,
223
+ logic_map: LogicMap,
224
+ category: Category,
225
+ scenario_type: ScenarioType,
226
+ count: int,
227
+ ) -> list[GoldenScenario]:
228
+ """Generate scenarios of a specific type."""
229
+ # Get type-specific instructions
230
+ type_instructions = TYPE_INSTRUCTIONS[scenario_type]
231
+
232
+ # Format Logic Map for prompt
233
+ logic_map_str = self._format_logic_map(logic_map)
234
+
235
+ # Build prompt
236
+ prompt = GOLDEN_SCENARIO_PROMPT.format(
237
+ scenario_type=scenario_type.value.upper(),
238
+ policy_text=policy_text,
239
+ logic_map=logic_map_str,
240
+ category=category.name,
241
+ count=count,
242
+ type_specific_instructions=type_instructions,
243
+ )
244
+
245
+ # Generate structured output
246
+ result = await self.llm.generate_structured(prompt, GoldenScenariosArray)
247
+
248
+ # Convert to domain models
249
+ scenarios = []
250
+ for s in result.scenarios:
251
+ scenario = GoldenScenario(
252
+ description=s.description,
253
+ context=s.context,
254
+ category=category.name,
255
+ scenario_type=ScenarioType(s.scenario_type),
256
+ target_rule_ids=s.target_rule_ids,
257
+ expected_outcome=s.expected_outcome,
258
+ )
259
+ scenarios.append(scenario)
260
+
261
+ # Enforce requested count (LLM may return more or fewer)
262
+ return scenarios[:count]
263
+
264
+ def _format_logic_map(self, logic_map: LogicMap) -> str:
265
+ """Format Logic Map for prompt inclusion."""
266
+ lines = []
267
+ lines.append("RULES:")
268
+ for rule in logic_map.rules:
269
+ deps = f" (depends on: {', '.join(rule.dependencies)})" if rule.dependencies else ""
270
+ lines.append(
271
+ f" {rule.rule_id} [{rule.category.value}]: {rule.text}{deps}"
272
+ )
273
+
274
+ lines.append("\nROOT RULES (Entry Points):")
275
+ lines.append(f" {', '.join(logic_map.root_rules)}")
276
+
277
+ return "\n".join(lines)
278
+
279
+ async def generate_for_categories(
280
+ self,
281
+ policy_text: str,
282
+ logic_map: LogicMap,
283
+ categories: list[Category],
284
+ ) -> tuple[list[GoldenScenario], dict[str, int]]:
285
+ """
286
+ Generate scenarios for multiple categories with distribution tracking.
287
+
288
+ Args:
289
+ policy_text: The policy document text
290
+ logic_map: The extracted Logic Map
291
+ categories: List of categories with counts
292
+
293
+ Returns:
294
+ Tuple of (all scenarios, type distribution counts)
295
+ """
296
+ # Generate for each category in parallel
297
+ tasks = [
298
+ self.generate(policy_text, logic_map, cat, cat.count)
299
+ for cat in categories
300
+ ]
301
+ results = await asyncio.gather(*tasks)
302
+
303
+ # Flatten scenarios
304
+ all_scenarios = []
305
+ for batch in results:
306
+ all_scenarios.extend(batch)
307
+
308
+ # Calculate distribution
309
+ distribution = {
310
+ ScenarioType.POSITIVE.value: 0,
311
+ ScenarioType.NEGATIVE.value: 0,
312
+ ScenarioType.EDGE_CASE.value: 0,
313
+ ScenarioType.IRRELEVANT.value: 0,
314
+ }
315
+ for s in all_scenarios:
316
+ distribution[s.scenario_type.value] += 1
317
+
318
+ return all_scenarios, distribution
319
+
320
+ def get_distribution_summary(self, scenarios: list[GoldenScenario]) -> dict[str, int]:
321
+ """Get a summary of scenario type distribution."""
322
+ distribution = {
323
+ "positive": 0,
324
+ "negative": 0,
325
+ "edge_case": 0,
326
+ "irrelevant": 0,
327
+ }
328
+ for s in scenarios:
329
+ distribution[s.scenario_type.value] += 1
330
+ return distribution
331
+
332
+
333
+ __all__ = ["GoldenScenarioGenerator", "DEFAULT_DISTRIBUTION"]