synkro 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. synkro/__init__.py +179 -0
  2. synkro/advanced.py +186 -0
  3. synkro/cli.py +128 -0
  4. synkro/core/__init__.py +7 -0
  5. synkro/core/checkpoint.py +250 -0
  6. synkro/core/dataset.py +402 -0
  7. synkro/core/policy.py +337 -0
  8. synkro/errors.py +178 -0
  9. synkro/examples/__init__.py +148 -0
  10. synkro/factory.py +276 -0
  11. synkro/formatters/__init__.py +12 -0
  12. synkro/formatters/qa.py +98 -0
  13. synkro/formatters/sft.py +90 -0
  14. synkro/formatters/tool_call.py +127 -0
  15. synkro/generation/__init__.py +9 -0
  16. synkro/generation/follow_ups.py +134 -0
  17. synkro/generation/generator.py +220 -0
  18. synkro/generation/golden_responses.py +244 -0
  19. synkro/generation/golden_scenarios.py +276 -0
  20. synkro/generation/golden_tool_responses.py +416 -0
  21. synkro/generation/logic_extractor.py +126 -0
  22. synkro/generation/multiturn_responses.py +177 -0
  23. synkro/generation/planner.py +131 -0
  24. synkro/generation/responses.py +189 -0
  25. synkro/generation/scenarios.py +90 -0
  26. synkro/generation/tool_responses.py +376 -0
  27. synkro/generation/tool_simulator.py +114 -0
  28. synkro/interactive/__init__.py +12 -0
  29. synkro/interactive/hitl_session.py +77 -0
  30. synkro/interactive/logic_map_editor.py +173 -0
  31. synkro/interactive/rich_ui.py +205 -0
  32. synkro/llm/__init__.py +7 -0
  33. synkro/llm/client.py +235 -0
  34. synkro/llm/rate_limits.py +95 -0
  35. synkro/models/__init__.py +43 -0
  36. synkro/models/anthropic.py +26 -0
  37. synkro/models/google.py +19 -0
  38. synkro/models/openai.py +31 -0
  39. synkro/modes/__init__.py +15 -0
  40. synkro/modes/config.py +66 -0
  41. synkro/modes/qa.py +18 -0
  42. synkro/modes/sft.py +18 -0
  43. synkro/modes/tool_call.py +18 -0
  44. synkro/parsers.py +442 -0
  45. synkro/pipeline/__init__.py +20 -0
  46. synkro/pipeline/phases.py +592 -0
  47. synkro/pipeline/runner.py +424 -0
  48. synkro/pipelines.py +123 -0
  49. synkro/prompts/__init__.py +57 -0
  50. synkro/prompts/base.py +167 -0
  51. synkro/prompts/golden_templates.py +474 -0
  52. synkro/prompts/interactive_templates.py +65 -0
  53. synkro/prompts/multiturn_templates.py +156 -0
  54. synkro/prompts/qa_templates.py +97 -0
  55. synkro/prompts/templates.py +281 -0
  56. synkro/prompts/tool_templates.py +201 -0
  57. synkro/quality/__init__.py +14 -0
  58. synkro/quality/golden_refiner.py +163 -0
  59. synkro/quality/grader.py +153 -0
  60. synkro/quality/multiturn_grader.py +150 -0
  61. synkro/quality/refiner.py +137 -0
  62. synkro/quality/tool_grader.py +126 -0
  63. synkro/quality/tool_refiner.py +128 -0
  64. synkro/quality/verifier.py +228 -0
  65. synkro/reporting.py +537 -0
  66. synkro/schemas.py +472 -0
  67. synkro/types/__init__.py +41 -0
  68. synkro/types/core.py +126 -0
  69. synkro/types/dataset_type.py +30 -0
  70. synkro/types/logic_map.py +345 -0
  71. synkro/types/tool.py +94 -0
  72. synkro-0.4.12.data/data/examples/__init__.py +148 -0
  73. synkro-0.4.12.dist-info/METADATA +258 -0
  74. synkro-0.4.12.dist-info/RECORD +77 -0
  75. synkro-0.4.12.dist-info/WHEEL +4 -0
  76. synkro-0.4.12.dist-info/entry_points.txt +2 -0
  77. synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,65 @@
1
+ """Prompt templates for interactive Logic Map editing."""
2
+
3
+ LOGIC_MAP_REFINEMENT_PROMPT = """You are a Logic Map editor. Your task is to modify a Logic Map based on user feedback.
4
+
5
+ CURRENT LOGIC MAP:
6
+ {current_logic_map}
7
+
8
+ ORIGINAL POLICY (for reference):
9
+ {policy_text}
10
+
11
+ USER FEEDBACK:
12
+ {user_feedback}
13
+
14
+ INSTRUCTIONS:
15
+ Interpret the user's natural language request and modify the Logic Map accordingly.
16
+
17
+ SUPPORTED OPERATIONS:
18
+
19
+ 1. **ADD**: Create a new rule
20
+ - User might say: "add a rule for...", "include a rule about...", "there should be a rule for..."
21
+ - Generate a new unique rule_id (use the next available number, e.g., if R008 exists, use R009)
22
+ - Extract condition, action, and dependencies from context
23
+ - Determine category based on rule type (CONSTRAINT, PERMISSION, PROCEDURE, EXCEPTION)
24
+
25
+ 2. **REMOVE**: Delete a rule
26
+ - User might say: "remove R005", "delete the rule about...", "R003 is not needed"
27
+ - Remove the specified rule
28
+ - Update dependencies in other rules that referenced the removed rule
29
+ - Update root_rules if the removed rule was a root
30
+
31
+ 3. **MERGE**: Combine two or more rules
32
+ - User might say: "merge R002 and R003", "combine these rules into one"
33
+ - Create a new rule that captures both conditions/actions
34
+ - Remove the original rules
35
+ - Update all dependencies that referenced the merged rules
36
+
37
+ 4. **MODIFY**: Change an existing rule
38
+ - User might say: "change R001 to...", "the condition for R002 should be...", "update R003's text"
39
+ - Update the specified fields (text, condition, action, category)
40
+ - Preserve rule_id and update dependencies if needed
41
+
42
+ 5. **SPLIT**: Divide a rule into multiple rules
43
+ - User might say: "split R001 into separate rules for X and Y"
44
+ - Create new rules with sequential IDs
45
+ - Remove original rule and update dependencies
46
+
47
+ 6. **REORDER DEPENDENCIES**: Change rule relationships
48
+ - User might say: "R003 should depend on R001", "remove dependency on R002 from R004"
49
+ - Update the dependencies arrays accordingly
50
+ - Ensure no circular dependencies are created
51
+
52
+ CRITICAL REQUIREMENTS:
53
+ - Maintain valid DAG structure (no circular dependencies)
54
+ - Ensure all rule_ids are unique
55
+ - Update root_rules list when dependencies change (root rules have no dependencies)
56
+ - Preserve existing rules that aren't affected by the change
57
+ - If the user's request is unclear, make a reasonable interpretation based on context
58
+
59
+ OUTPUT:
60
+ Return the complete updated Logic Map with ALL rules (both modified and unmodified).
61
+ Provide a brief changes_summary explaining what was done.
62
+ Provide reasoning explaining how you interpreted the user's feedback."""
63
+
64
+
65
+ __all__ = ["LOGIC_MAP_REFINEMENT_PROMPT"]
@@ -0,0 +1,156 @@
1
+ """Multi-turn conversation prompt templates for dataset generation."""
2
+
3
+ # =============================================================================
4
+ # FOLLOW-UP QUESTION GENERATION
5
+ # =============================================================================
6
+
7
+ FOLLOW_UP_GENERATION_PROMPT = """You are generating a follow-up question for a multi-turn policy conversation.
8
+
9
+ Generate a {question_type} follow-up question based on the conversation so far.
10
+
11
+ QUESTION TYPES:
12
+ - **clarification**: Ask for more details about an ambiguous point in the previous response
13
+ - **edge_case**: Probe a boundary condition or unusual scenario related to the policy
14
+ - **what_if**: Explore a hypothetical variation ("What if X changes?")
15
+ - **specificity**: Drill into specific implementation details or examples
16
+ - **challenge**: Question the reasoning or ask for justification of a recommendation
17
+
18
+ CONVERSATION SO FAR:
19
+ {conversation}
20
+
21
+ POLICY:
22
+ {policy}
23
+
24
+ Generate a follow-up that:
25
+ 1. Builds naturally on the conversation context
26
+ 2. Tests deeper understanding of the policy
27
+ 3. Is realistic - something a user would actually ask
28
+ 4. Matches the specified question type
29
+ 5. Is specific enough to require a substantive response
30
+
31
+ Respond with ONLY the follow-up question text."""
32
+
33
+ # =============================================================================
34
+ # MULTI-TURN RESPONSE GENERATION
35
+ # =============================================================================
36
+
37
+ MULTI_TURN_RESPONSE_PROMPT = """You are a domain expert continuing a multi-turn policy conversation.
38
+
39
+ CONVERSATION HISTORY:
40
+ {conversation}
41
+
42
+ LATEST QUESTION:
43
+ {question}
44
+
45
+ POLICY:
46
+ {policy}
47
+
48
+ Provide a response that:
49
+ 1. Directly addresses the latest question
50
+ 2. Maintains consistency with your previous responses
51
+ 3. Cites specific policy sections that apply
52
+ 4. Builds on the established context
53
+ 5. Uses <reasoning> tags to show your thought process
54
+ 6. Gives specific, actionable recommendations
55
+
56
+ Your response should acknowledge what was discussed before and add new insights.
57
+ Keep the response appropriately concise for a conversational turn."""
58
+
59
+ MULTI_TURN_INITIAL_PROMPT = """You are a domain expert starting a multi-turn conversation.
60
+
61
+ This conversation will have {target_turns} turns. Start with a response that:
62
+ 1. Addresses the initial question thoroughly
63
+ 2. Uses <reasoning> tags to show your thought process
64
+ 3. Cites specific policy sections
65
+ 4. Leaves room for natural follow-up questions
66
+ 5. Gives specific, actionable initial guidance
67
+
68
+ SCENARIO:
69
+ {scenario}
70
+
71
+ CONTEXT:
72
+ {context}
73
+
74
+ POLICY:
75
+ {policy}
76
+
77
+ Respond as the assistant. Your response should be comprehensive but leave room for the user to ask follow-up questions that will deepen the discussion."""
78
+
79
+ # =============================================================================
80
+ # MULTI-TURN GRADING
81
+ # =============================================================================
82
+
83
+ MULTI_TURN_GRADE_PROMPT = """You are a strict evaluator grading a multi-turn policy conversation.
84
+
85
+ CONVERSATION:
86
+ {conversation}
87
+
88
+ POLICY:
89
+ {policy}
90
+
91
+ Evaluate EACH assistant turn AND the overall conversation.
92
+
93
+ For EACH assistant turn, check:
94
+ 1. **Policy Compliant** - Recommendations follow the policy exactly
95
+ 2. **Properly Cited** - Relevant policy sections are referenced
96
+ 3. **Complete Reasoning** - Logic is sound with no gaps
97
+ 4. **Actionable** - Recommendations are specific, not vague
98
+
99
+ For the OVERALL conversation, check:
100
+ 1. **Coherence** - No contradictions across turns
101
+ 2. **Progressive Depth** - Each turn appropriately builds on context
102
+ 3. **Consistency** - Recommendations don't conflict with earlier statements
103
+
104
+ The conversation PASSES only if:
105
+ - ALL individual turns pass their criteria
106
+ - The overall coherence and consistency checks pass
107
+
108
+ Respond with a structured evaluation for each turn and overall assessment."""
109
+
110
+ TURN_GRADE_FORMAT = """{{
111
+ "turn_index": {turn_index},
112
+ "pass": <true/false>,
113
+ "policy_violations": ["<violation>", ...],
114
+ "missing_citations": ["<missing>", ...],
115
+ "incomplete_reasoning": ["<gap>", ...],
116
+ "vague_recommendations": ["<vague>", ...],
117
+ "feedback": "<specific feedback for this turn>"
118
+ }}"""
119
+
120
+ CONVERSATION_GRADE_FORMAT = """{{
121
+ "index": {index},
122
+ "overall_pass": <true/false>,
123
+ "turn_grades": [<array of turn grades>],
124
+ "coherence_pass": <true/false>,
125
+ "coherence_issues": ["<contradiction or incoherence>", ...],
126
+ "progressive_depth": <true/false>,
127
+ "overall_feedback": "<summary of what needs fixing across the conversation>"
128
+ }}"""
129
+
130
+ # =============================================================================
131
+ # MULTI-TURN REFINEMENT
132
+ # =============================================================================
133
+
134
+ MULTI_TURN_REFINE_PROMPT = """You are improving a multi-turn conversation based on grader feedback.
135
+
136
+ ORIGINAL CONVERSATION:
137
+ {conversation}
138
+
139
+ POLICY:
140
+ {policy}
141
+
142
+ GRADING FEEDBACK:
143
+ {feedback}
144
+
145
+ Fix ALL issues while maintaining conversation coherence:
146
+ 1. Address every policy violation in each turn
147
+ 2. Add missing citations where indicated
148
+ 3. Fill reasoning gaps with step-by-step logic
149
+ 4. Make vague recommendations specific and actionable
150
+ 5. Fix any coherence issues between turns
151
+ 6. Ensure progressive depth in the conversation
152
+
153
+ IMPORTANT: Maintain the same conversation structure (same number of turns, same topics).
154
+ Only improve the CONTENT of the assistant responses.
155
+
156
+ Output the improved conversation with all turns."""
@@ -0,0 +1,97 @@
1
+ """QA-specific prompt templates for question-answer pair generation."""
2
+
3
+ QA_SCENARIO_PROMPT = """You are an expert at creating factual questions from documents.
4
+
5
+ Given a document, generate diverse questions that can be answered directly from the content.
6
+
7
+ Types of questions to generate:
8
+ 1. **Factual** - Who, what, when, where questions with direct answers
9
+ 2. **Definitional** - "What is..." or "Define..." questions
10
+ 3. **Procedural** - "How do you..." or "What are the steps..."
11
+ 4. **Comparative** - Questions comparing concepts within the document
12
+ 5. **Inferential** - Questions requiring light reasoning from stated facts
13
+
14
+ Make each question:
15
+ - Answerable from the document (no external knowledge needed)
16
+ - Specific and unambiguous
17
+ - Varied in complexity and type
18
+ - Natural - how a real person would ask
19
+
20
+ Focus on creating questions that test comprehension of the document content."""
21
+
22
+ QA_RESPONSE_PROMPT = """You are answering questions using ONLY information from the provided document.
23
+
24
+ Rules:
25
+ 1. Answer ONLY using facts stated in the document
26
+ 2. Quote or paraphrase the relevant section
27
+ 3. If the answer isn't in the document, say "Not found in document"
28
+ 4. Keep answers concise but complete
29
+ 5. Include the source section/paragraph when possible
30
+
31
+ Your response must be a JSON object:
32
+ {{
33
+ "question": "<the question being answered>",
34
+ "answer": "<your answer using document facts>",
35
+ "context": "<the relevant passage from the document>"
36
+ }}
37
+
38
+ DOCUMENT:
39
+ {policy}
40
+
41
+ QUESTION:
42
+ {scenario}
43
+
44
+ Respond with ONLY the JSON object."""
45
+
46
+ QA_GRADE_PROMPT = """You are grading a question-answer pair for quality.
47
+
48
+ A QA pair PASSES only if ALL are true:
49
+ 1. **Factually Correct** - Answer is accurate based on the document
50
+ 2. **Properly Sourced** - Context contains the relevant passage
51
+ 3. **Complete** - Answer fully addresses the question
52
+ 4. **Concise** - No unnecessary information or padding
53
+ 5. **Grounded** - No information made up beyond the document
54
+
55
+ DOCUMENT:
56
+ {policy}
57
+
58
+ QUESTION:
59
+ {scenario}
60
+
61
+ ANSWER TO GRADE:
62
+ {response}
63
+
64
+ Respond with ONLY a JSON object:
65
+ {{
66
+ "pass": <true/false>,
67
+ "factual_errors": ["<error 1>", ...],
68
+ "missing_info": ["<missing 1>", ...],
69
+ "source_issues": ["<issue 1>", ...],
70
+ "feedback": "<summary of issues or 'Correct'>"
71
+ }}"""
72
+
73
+ QA_REFINE_PROMPT = """You are improving a question-answer pair based on feedback.
74
+
75
+ Fix all issues while maintaining accuracy to the source document.
76
+
77
+ DOCUMENT:
78
+ {policy}
79
+
80
+ QUESTION:
81
+ {scenario}
82
+
83
+ ORIGINAL ANSWER:
84
+ {response}
85
+
86
+ ISSUES TO FIX:
87
+ {feedback}
88
+
89
+ Generate an IMPROVED answer. Output a JSON object:
90
+ {{
91
+ "question": "<the question>",
92
+ "answer": "<your IMPROVED answer>",
93
+ "context": "<the relevant passage from the document>"
94
+ }}
95
+
96
+ Respond with ONLY the JSON object."""
97
+
@@ -0,0 +1,281 @@
1
+ """Universal prompt templates for dataset generation across ANY domain."""
2
+
3
+ # =============================================================================
4
+ # POLICY ANALYSIS PROMPTS
5
+ # =============================================================================
6
+
7
+ POLICY_COMPLEXITY_PROMPT = """You are an expert at analyzing policy documents to determine their complexity.
8
+
9
+ Analyze the given policy and determine the optimal number of conversation turns needed to properly test understanding.
10
+
11
+ Guidelines:
12
+ - **Simple (1-2 turns)**: Policy has 1 clear variable/rule. Single query → Straight answer.
13
+ Example: "All data must be encrypted" - just one rule to check.
14
+
15
+ - **Conditional (3 turns)**: Policy has 2-3 variables/conditions. Query → Clarification → Verdict.
16
+ Example: "Data can be shared IF consent is given AND purpose is specified" - needs clarification.
17
+
18
+ - **Complex (5+ turns)**: Policy has 4+ nested variables, exceptions, or conditions.
19
+ Multiple rounds of validation before final sign-off.
20
+ Example: "Data retention varies by type, region, consent status, and business need" - needs deep exploration.
21
+
22
+ Count the following as "variables":
23
+ - Distinct rules or requirements
24
+ - Conditional branches (if/then/else)
25
+ - Exceptions to rules
26
+ - Categories or types that affect decisions
27
+ - Time-based conditions
28
+ - Role-based permissions
29
+
30
+ Respond with your analysis."""
31
+
32
+ POLICY_PLANNING_PROMPT = """You are an expert at creating training data plans for AI models across ANY domain.
33
+
34
+ Given a task description, policy, or domain specification and a target number of traces, analyze the content and create an optimal plan for generating training data.
35
+
36
+ Your task:
37
+ 1. Deeply analyze the domain/task to understand its core concepts, rules, processes, and challenges
38
+ 2. Identify distinct SCENARIO CATEGORIES that test different aspects of the domain
39
+ 3. Distribute the target traces across categories based on complexity and importance
40
+ 4. Ensure coverage of: clear violations/errors, edge cases, happy paths, real-world constraints, and domain-specific challenges
41
+
42
+ Guidelines for dynamic category creation:
43
+ - **Analyze the domain deeply**: Understand the core rules, processes, stakeholders, and common challenges
44
+ - **Create domain-specific categories**: Base categories on the actual content, not generic assumptions
45
+ - **Balance complexity**: Allocate based on domain complexity (simple domains: 60% happy paths, complex domains: 40% edge cases)
46
+ - **Ensure comprehensive coverage**: Every major aspect of the domain should be tested
47
+ - **Consider domain-specific challenges**: Time pressure in trading, regulatory changes in finance, technical failures in engineering, etc.
48
+
49
+ For each category, provide:
50
+ - name: Short descriptive name specific to the domain
51
+ - description: What this category tests, including specific domain concepts and challenges
52
+ - traces: Number of traces to generate (must sum to target)
53
+
54
+ Provide detailed reasoning explaining:
55
+ 1. Your analysis of the domain's core concepts and challenges
56
+ 2. Why you chose these specific categories for this domain
57
+ 3. How the category distribution reflects the domain's complexity and real-world usage patterns"""
58
+
59
+ # =============================================================================
60
+ # SCENARIO GENERATION PROMPTS
61
+ # =============================================================================
62
+
63
+ SCENARIO_GENERATOR_PROMPT = """You are an expert at creating realistic scenarios for ANY domain or task.
64
+
65
+ Given a task description, policy, or domain specification, first deeply analyze the domain to understand:
66
+ - Core concepts, rules, and processes
67
+ - Key stakeholders and their roles
68
+ - Common challenges and failure modes
69
+ - Domain-specific terminology and workflows
70
+
71
+ Then generate diverse scenarios that thoroughly test understanding of the domain:
72
+
73
+ 1. **Clear Success/Failure Cases** - Obvious correct/incorrect applications of domain rules
74
+ 2. **Edge Cases** - Ambiguous situations with multiple valid interpretations
75
+ 3. **Multi-Step Processes** - Complex scenarios requiring sequential reasoning
76
+ 4. **Real-World Constraints** - Practical limitations like time pressure, incomplete info, resource constraints
77
+ 5. **Domain-Specific Challenges** - Scenarios that test unique aspects of this particular domain
78
+ 6. **Stakeholder Interactions** - Situations involving coordination between different parties
79
+ 7. **Exception Handling** - Scenarios requiring deviation from standard processes
80
+
81
+ Make each scenario:
82
+ - Deeply grounded in the specific domain's concepts and terminology
83
+ - Realistic and challenging for someone working in that domain
84
+ - Specific with concrete details that reflect actual domain practices
85
+ - Varied in complexity and stakeholder perspectives
86
+ - Designed to reveal both expert and novice understanding gaps
87
+
88
+ Focus on creating "golden traces" - perfect examples that demonstrate deep domain mastery."""
89
+
90
+ CATEGORY_SCENARIO_PROMPT = """You are an expert at creating realistic scenarios for ANY domain or task.
91
+
92
+ Generate scenarios specifically for the following CATEGORY within the given domain:
93
+ **Category Name**: {CATEGORY_NAME}
94
+ **Category Description**: {CATEGORY_DESCRIPTION}
95
+
96
+ First, deeply understand:
97
+ - How this category fits into the broader domain
98
+ - What specific skills or knowledge this category tests
99
+ - The real-world contexts where this category applies
100
+ - Common mistakes or misconceptions in this category
101
+
102
+ All generated scenarios MUST:
103
+ - Perfectly fit this specific category's focus and objectives
104
+ - Demonstrate deep understanding of the category's role in the domain
105
+ - Test the exact skills and knowledge described in the category
106
+ - Be realistic and occur in actual domain practice
107
+
108
+ Make each scenario:
109
+ - Highly specific with concrete details that reflect domain expertise
110
+ - Challenging and nuanced - not simplistic examples
111
+ - Varied in stakeholder perspectives, contexts, and complexity levels
112
+ - Different from each other (no duplicates) - explore different facets of the category
113
+ - Include domain-specific terminology, processes, and challenges
114
+ - Designed as "golden traces" that showcase expert-level understanding
115
+
116
+ Focus on creating scenarios that would distinguish between novice and expert performance in this category."""
117
+
118
+ # =============================================================================
119
+ # SYSTEM PROMPT
120
+ # =============================================================================
121
+
122
+ SYSTEM_PROMPT = """You are a domain expert. When given a scenario and context, provide comprehensive, expert-level guidance.
123
+
124
+ IMPORTANT: Always show your reasoning process using <reasoning> tags before giving your answer.
125
+
126
+ Your responses must:
127
+ - Start with <reasoning> tags showing step-by-step analysis
128
+ - Cite specific domain concepts, rules, or processes that apply
129
+ - Give specific, actionable recommendations grounded in domain best practices
130
+ - Address all aspects of the scenario from multiple stakeholder perspectives
131
+ - Acknowledge edge cases, exceptions, and potential complications
132
+ - Consider contemporary challenges and modern practices in the domain
133
+
134
+ Vary your response style while maintaining expertise:
135
+ - For concise responses: Direct, focused guidance with key domain principles
136
+ - For detailed responses: Comprehensive analysis with structured breakdowns and examples
137
+ - For practical responses: Step-by-step implementation guides and checklists
138
+ - For complex responses: Thorough exploration of trade-offs and alternative approaches
139
+
140
+ Always prioritize accuracy, clarity, and deep domain understanding in your guidance."""
141
+
142
+ # =============================================================================
143
+ # BATCHED PROMPTS (for batch generation)
144
+ # =============================================================================
145
+
146
+ BATCHED_RESPONSE_PROMPT = """You are generating training data for a domain expert model.
147
+
148
+ For EACH scenario below, create a complete training example in CHAT MESSAGES FORMAT.
149
+
150
+ Each training example must have exactly 3 messages:
151
+ 1. "system" - The system prompt defining the assistant's role
152
+ 2. "user" - The scenario and context as the user's question
153
+ 3. "assistant" - Your expert guidance response
154
+
155
+ The assistant response must:
156
+ - Cite specific policy sections that apply
157
+ - Explain reasoning step-by-step
158
+ - Give specific, actionable recommendations
159
+ - Address all aspects of the scenario
160
+ - Acknowledge edge cases and complications
161
+
162
+ Respond with a JSON array where each object has:
163
+ - "index": the scenario number (0-based)
164
+ - "messages": array of 3 message objects with "role" and "content" fields"""
165
+
166
+ BATCHED_GRADER_PROMPT = """You are a strict policy compliance evaluator. Your job is to determine if EACH response is FULLY CORRECT.
167
+
168
+ A response PASSES only if ALL of the following are true:
169
+ 1. **Policy Compliant** - Every recommendation follows the policy exactly. No violations.
170
+ 2. **Fully Supported** - Every claim is backed by a specific policy section. Nothing made up.
171
+ 3. **Properly Cited** - All relevant policy sections are explicitly referenced.
172
+ 4. **Complete Reasoning** - The chain of thought is complete with no gaps or skipped steps.
173
+ 5. **Actionable & Specific** - All recommendations are concrete and implementable, not vague.
174
+
175
+ If ANY of these fail, the response does NOT pass. Be strict - only mark "pass": true for perfect responses.
176
+
177
+ For each response, provide structured feedback:
178
+ - "policy_violations": List any rules misinterpreted or violated
179
+ - "missing_citations": List policy sections that should have been cited
180
+ - "incomplete_reasoning": List logical gaps or missing reasoning steps
181
+ - "vague_recommendations": List recommendations that need to be more specific
182
+ - "feedback": Summary of what needs to be fixed
183
+
184
+ Respond with a JSON array where each object has:
185
+ - "index": the scenario number (0-based)
186
+ - "pass": boolean (true ONLY if response is fully correct)
187
+ - "policy_violations": array of violations
188
+ - "missing_citations": array of missing citations
189
+ - "incomplete_reasoning": array of reasoning gaps
190
+ - "vague_recommendations": array of vague items
191
+ - "feedback": summary of how to fix"""
192
+
193
+ BATCHED_REFINER_PROMPT = """You are improving training data for a domain expert model based on grader feedback.
194
+
195
+ For EACH scenario with feedback below, fix ALL issues while keeping what was correct.
196
+
197
+ You will receive structured feedback with:
198
+ - policy_violations: Rules you violated or misinterpreted - FIX THESE
199
+ - missing_citations: Policy sections you should cite - ADD THESE
200
+ - incomplete_reasoning: Gaps in your logic - FILL THESE IN
201
+ - vague_recommendations: Things that need to be more specific - MAKE CONCRETE
202
+
203
+ Requirements:
204
+ 1. Fix every policy violation - ensure recommendations follow the policy exactly
205
+ 2. Add citations for every missing policy section mentioned
206
+ 3. Complete any incomplete reasoning chains with step-by-step logic
207
+ 4. Replace vague language with specific, actionable recommendations
208
+ 5. Keep the parts that were already correct
209
+
210
+ Output in CHAT MESSAGES FORMAT with exactly 3 messages:
211
+ 1. "system" - The system prompt defining the assistant's role
212
+ 2. "user" - The scenario and context as the user's question
213
+ 3. "assistant" - Your IMPROVED guidance
214
+
215
+ Respond with a JSON array where each object has:
216
+ - "index": the scenario number (0-based)
217
+ - "messages": array of 3 message objects with "role" and "content" fields"""
218
+
219
+ # =============================================================================
220
+ # SINGLE PROMPTS (for parallel high-concurrency generation)
221
+ # =============================================================================
222
+
223
+ SINGLE_RESPONSE_PROMPT = """You are a domain expert generating a training example.
224
+
225
+ Given the scenario and policy below, create a complete training example.
226
+
227
+ Your response must be a JSON object with exactly 3 messages:
228
+ {{
229
+ "messages": [
230
+ {{"role": "system", "content": "<system prompt defining expert role>"}},
231
+ {{"role": "user", "content": "<the scenario as a user question>"}},
232
+ {{"role": "assistant", "content": "<your expert response>"}}
233
+ ]
234
+ }}
235
+
236
+ The assistant response must:
237
+ - Start with <reasoning> tags showing your thought process
238
+ - Cite specific policy sections that apply
239
+ - Give specific, actionable recommendations
240
+ - Address all aspects of the scenario
241
+ - Acknowledge edge cases and complications
242
+
243
+ SCENARIO:
244
+ {scenario}
245
+
246
+ CONTEXT:
247
+ {context}
248
+
249
+ POLICY:
250
+ {policy}
251
+
252
+ Respond with ONLY the JSON object, no additional text."""
253
+
254
+ SINGLE_GRADE_PROMPT = """You are a strict evaluator. Grade this response.
255
+
256
+ A response PASSES only if ALL are true:
257
+ 1. Policy Compliant - Every recommendation follows the policy exactly
258
+ 2. Fully Supported - Every claim backed by specific policy section
259
+ 3. Properly Cited - All relevant policy sections referenced
260
+ 4. Complete Reasoning - Chain of thought has no gaps
261
+ 5. Actionable & Specific - Recommendations are concrete, not vague
262
+
263
+ SCENARIO:
264
+ {scenario}
265
+
266
+ POLICY:
267
+ {policy}
268
+
269
+ RESPONSE TO GRADE:
270
+ {response}
271
+
272
+ Respond with ONLY a JSON object:
273
+ {{
274
+ "pass": <true/false>,
275
+ "policy_violations": ["<violation 1>", ...],
276
+ "missing_citations": ["<missing 1>", ...],
277
+ "incomplete_reasoning": ["<gap 1>", ...],
278
+ "vague_recommendations": ["<vague 1>", ...],
279
+ "feedback": "<summary of issues or 'Correct'>"
280
+ }}"""
281
+