synkro 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkro/__init__.py +179 -0
- synkro/advanced.py +186 -0
- synkro/cli.py +128 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +402 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +276 -0
- synkro/formatters/__init__.py +12 -0
- synkro/formatters/qa.py +98 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +220 -0
- synkro/generation/golden_responses.py +244 -0
- synkro/generation/golden_scenarios.py +276 -0
- synkro/generation/golden_tool_responses.py +416 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +376 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +12 -0
- synkro/interactive/hitl_session.py +77 -0
- synkro/interactive/logic_map_editor.py +173 -0
- synkro/interactive/rich_ui.py +205 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +235 -0
- synkro/llm/rate_limits.py +95 -0
- synkro/models/__init__.py +43 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +15 -0
- synkro/modes/config.py +66 -0
- synkro/modes/qa.py +18 -0
- synkro/modes/sft.py +18 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +424 -0
- synkro/pipelines.py +123 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +474 -0
- synkro/prompts/interactive_templates.py +65 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/qa_templates.py +97 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +201 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +537 -0
- synkro/schemas.py +472 -0
- synkro/types/__init__.py +41 -0
- synkro/types/core.py +126 -0
- synkro/types/dataset_type.py +30 -0
- synkro/types/logic_map.py +345 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.12.data/data/examples/__init__.py +148 -0
- synkro-0.4.12.dist-info/METADATA +258 -0
- synkro-0.4.12.dist-info/RECORD +77 -0
- synkro-0.4.12.dist-info/WHEEL +4 -0
- synkro-0.4.12.dist-info/entry_points.txt +2 -0
- synkro-0.4.12.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Prompt templates for interactive Logic Map editing."""
|
|
2
|
+
|
|
3
|
+
LOGIC_MAP_REFINEMENT_PROMPT = """You are a Logic Map editor. Your task is to modify a Logic Map based on user feedback.
|
|
4
|
+
|
|
5
|
+
CURRENT LOGIC MAP:
|
|
6
|
+
{current_logic_map}
|
|
7
|
+
|
|
8
|
+
ORIGINAL POLICY (for reference):
|
|
9
|
+
{policy_text}
|
|
10
|
+
|
|
11
|
+
USER FEEDBACK:
|
|
12
|
+
{user_feedback}
|
|
13
|
+
|
|
14
|
+
INSTRUCTIONS:
|
|
15
|
+
Interpret the user's natural language request and modify the Logic Map accordingly.
|
|
16
|
+
|
|
17
|
+
SUPPORTED OPERATIONS:
|
|
18
|
+
|
|
19
|
+
1. **ADD**: Create a new rule
|
|
20
|
+
- User might say: "add a rule for...", "include a rule about...", "there should be a rule for..."
|
|
21
|
+
- Generate a new unique rule_id (use the next available number, e.g., if R008 exists, use R009)
|
|
22
|
+
- Extract condition, action, and dependencies from context
|
|
23
|
+
- Determine category based on rule type (CONSTRAINT, PERMISSION, PROCEDURE, EXCEPTION)
|
|
24
|
+
|
|
25
|
+
2. **REMOVE**: Delete a rule
|
|
26
|
+
- User might say: "remove R005", "delete the rule about...", "R003 is not needed"
|
|
27
|
+
- Remove the specified rule
|
|
28
|
+
- Update dependencies in other rules that referenced the removed rule
|
|
29
|
+
- Update root_rules if the removed rule was a root
|
|
30
|
+
|
|
31
|
+
3. **MERGE**: Combine two or more rules
|
|
32
|
+
- User might say: "merge R002 and R003", "combine these rules into one"
|
|
33
|
+
- Create a new rule that captures both conditions/actions
|
|
34
|
+
- Remove the original rules
|
|
35
|
+
- Update all dependencies that referenced the merged rules
|
|
36
|
+
|
|
37
|
+
4. **MODIFY**: Change an existing rule
|
|
38
|
+
- User might say: "change R001 to...", "the condition for R002 should be...", "update R003's text"
|
|
39
|
+
- Update the specified fields (text, condition, action, category)
|
|
40
|
+
- Preserve rule_id and update dependencies if needed
|
|
41
|
+
|
|
42
|
+
5. **SPLIT**: Divide a rule into multiple rules
|
|
43
|
+
- User might say: "split R001 into separate rules for X and Y"
|
|
44
|
+
- Create new rules with sequential IDs
|
|
45
|
+
- Remove original rule and update dependencies
|
|
46
|
+
|
|
47
|
+
6. **REORDER DEPENDENCIES**: Change rule relationships
|
|
48
|
+
- User might say: "R003 should depend on R001", "remove dependency on R002 from R004"
|
|
49
|
+
- Update the dependencies arrays accordingly
|
|
50
|
+
- Ensure no circular dependencies are created
|
|
51
|
+
|
|
52
|
+
CRITICAL REQUIREMENTS:
|
|
53
|
+
- Maintain valid DAG structure (no circular dependencies)
|
|
54
|
+
- Ensure all rule_ids are unique
|
|
55
|
+
- Update root_rules list when dependencies change (root rules have no dependencies)
|
|
56
|
+
- Preserve existing rules that aren't affected by the change
|
|
57
|
+
- If the user's request is unclear, make a reasonable interpretation based on context
|
|
58
|
+
|
|
59
|
+
OUTPUT:
|
|
60
|
+
Return the complete updated Logic Map with ALL rules (both modified and unmodified).
|
|
61
|
+
Provide a brief changes_summary explaining what was done.
|
|
62
|
+
Provide reasoning explaining how you interpreted the user's feedback."""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
__all__ = ["LOGIC_MAP_REFINEMENT_PROMPT"]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Multi-turn conversation prompt templates for dataset generation."""
|
|
2
|
+
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# FOLLOW-UP QUESTION GENERATION
|
|
5
|
+
# =============================================================================
|
|
6
|
+
|
|
7
|
+
FOLLOW_UP_GENERATION_PROMPT = """You are generating a follow-up question for a multi-turn policy conversation.
|
|
8
|
+
|
|
9
|
+
Generate a {question_type} follow-up question based on the conversation so far.
|
|
10
|
+
|
|
11
|
+
QUESTION TYPES:
|
|
12
|
+
- **clarification**: Ask for more details about an ambiguous point in the previous response
|
|
13
|
+
- **edge_case**: Probe a boundary condition or unusual scenario related to the policy
|
|
14
|
+
- **what_if**: Explore a hypothetical variation ("What if X changes?")
|
|
15
|
+
- **specificity**: Drill into specific implementation details or examples
|
|
16
|
+
- **challenge**: Question the reasoning or ask for justification of a recommendation
|
|
17
|
+
|
|
18
|
+
CONVERSATION SO FAR:
|
|
19
|
+
{conversation}
|
|
20
|
+
|
|
21
|
+
POLICY:
|
|
22
|
+
{policy}
|
|
23
|
+
|
|
24
|
+
Generate a follow-up that:
|
|
25
|
+
1. Builds naturally on the conversation context
|
|
26
|
+
2. Tests deeper understanding of the policy
|
|
27
|
+
3. Is realistic - something a user would actually ask
|
|
28
|
+
4. Matches the specified question type
|
|
29
|
+
5. Is specific enough to require a substantive response
|
|
30
|
+
|
|
31
|
+
Respond with ONLY the follow-up question text."""
|
|
32
|
+
|
|
33
|
+
# =============================================================================
|
|
34
|
+
# MULTI-TURN RESPONSE GENERATION
|
|
35
|
+
# =============================================================================
|
|
36
|
+
|
|
37
|
+
MULTI_TURN_RESPONSE_PROMPT = """You are a domain expert continuing a multi-turn policy conversation.
|
|
38
|
+
|
|
39
|
+
CONVERSATION HISTORY:
|
|
40
|
+
{conversation}
|
|
41
|
+
|
|
42
|
+
LATEST QUESTION:
|
|
43
|
+
{question}
|
|
44
|
+
|
|
45
|
+
POLICY:
|
|
46
|
+
{policy}
|
|
47
|
+
|
|
48
|
+
Provide a response that:
|
|
49
|
+
1. Directly addresses the latest question
|
|
50
|
+
2. Maintains consistency with your previous responses
|
|
51
|
+
3. Cites specific policy sections that apply
|
|
52
|
+
4. Builds on the established context
|
|
53
|
+
5. Uses <reasoning> tags to show your thought process
|
|
54
|
+
6. Gives specific, actionable recommendations
|
|
55
|
+
|
|
56
|
+
Your response should acknowledge what was discussed before and add new insights.
|
|
57
|
+
Keep the response appropriately concise for a conversational turn."""
|
|
58
|
+
|
|
59
|
+
MULTI_TURN_INITIAL_PROMPT = """You are a domain expert starting a multi-turn conversation.
|
|
60
|
+
|
|
61
|
+
This conversation will have {target_turns} turns. Start with a response that:
|
|
62
|
+
1. Addresses the initial question thoroughly
|
|
63
|
+
2. Uses <reasoning> tags to show your thought process
|
|
64
|
+
3. Cites specific policy sections
|
|
65
|
+
4. Leaves room for natural follow-up questions
|
|
66
|
+
5. Gives specific, actionable initial guidance
|
|
67
|
+
|
|
68
|
+
SCENARIO:
|
|
69
|
+
{scenario}
|
|
70
|
+
|
|
71
|
+
CONTEXT:
|
|
72
|
+
{context}
|
|
73
|
+
|
|
74
|
+
POLICY:
|
|
75
|
+
{policy}
|
|
76
|
+
|
|
77
|
+
Respond as the assistant. Your response should be comprehensive but leave room for the user to ask follow-up questions that will deepen the discussion."""
|
|
78
|
+
|
|
79
|
+
# =============================================================================
|
|
80
|
+
# MULTI-TURN GRADING
|
|
81
|
+
# =============================================================================
|
|
82
|
+
|
|
83
|
+
MULTI_TURN_GRADE_PROMPT = """You are a strict evaluator grading a multi-turn policy conversation.
|
|
84
|
+
|
|
85
|
+
CONVERSATION:
|
|
86
|
+
{conversation}
|
|
87
|
+
|
|
88
|
+
POLICY:
|
|
89
|
+
{policy}
|
|
90
|
+
|
|
91
|
+
Evaluate EACH assistant turn AND the overall conversation.
|
|
92
|
+
|
|
93
|
+
For EACH assistant turn, check:
|
|
94
|
+
1. **Policy Compliant** - Recommendations follow the policy exactly
|
|
95
|
+
2. **Properly Cited** - Relevant policy sections are referenced
|
|
96
|
+
3. **Complete Reasoning** - Logic is sound with no gaps
|
|
97
|
+
4. **Actionable** - Recommendations are specific, not vague
|
|
98
|
+
|
|
99
|
+
For the OVERALL conversation, check:
|
|
100
|
+
1. **Coherence** - No contradictions across turns
|
|
101
|
+
2. **Progressive Depth** - Each turn appropriately builds on context
|
|
102
|
+
3. **Consistency** - Recommendations don't conflict with earlier statements
|
|
103
|
+
|
|
104
|
+
The conversation PASSES only if:
|
|
105
|
+
- ALL individual turns pass their criteria
|
|
106
|
+
- The overall coherence and consistency checks pass
|
|
107
|
+
|
|
108
|
+
Respond with a structured evaluation for each turn and overall assessment."""
|
|
109
|
+
|
|
110
|
+
TURN_GRADE_FORMAT = """{{
|
|
111
|
+
"turn_index": {turn_index},
|
|
112
|
+
"pass": <true/false>,
|
|
113
|
+
"policy_violations": ["<violation>", ...],
|
|
114
|
+
"missing_citations": ["<missing>", ...],
|
|
115
|
+
"incomplete_reasoning": ["<gap>", ...],
|
|
116
|
+
"vague_recommendations": ["<vague>", ...],
|
|
117
|
+
"feedback": "<specific feedback for this turn>"
|
|
118
|
+
}}"""
|
|
119
|
+
|
|
120
|
+
CONVERSATION_GRADE_FORMAT = """{{
|
|
121
|
+
"index": {index},
|
|
122
|
+
"overall_pass": <true/false>,
|
|
123
|
+
"turn_grades": [<array of turn grades>],
|
|
124
|
+
"coherence_pass": <true/false>,
|
|
125
|
+
"coherence_issues": ["<contradiction or incoherence>", ...],
|
|
126
|
+
"progressive_depth": <true/false>,
|
|
127
|
+
"overall_feedback": "<summary of what needs fixing across the conversation>"
|
|
128
|
+
}}"""
|
|
129
|
+
|
|
130
|
+
# =============================================================================
|
|
131
|
+
# MULTI-TURN REFINEMENT
|
|
132
|
+
# =============================================================================
|
|
133
|
+
|
|
134
|
+
MULTI_TURN_REFINE_PROMPT = """You are improving a multi-turn conversation based on grader feedback.
|
|
135
|
+
|
|
136
|
+
ORIGINAL CONVERSATION:
|
|
137
|
+
{conversation}
|
|
138
|
+
|
|
139
|
+
POLICY:
|
|
140
|
+
{policy}
|
|
141
|
+
|
|
142
|
+
GRADING FEEDBACK:
|
|
143
|
+
{feedback}
|
|
144
|
+
|
|
145
|
+
Fix ALL issues while maintaining conversation coherence:
|
|
146
|
+
1. Address every policy violation in each turn
|
|
147
|
+
2. Add missing citations where indicated
|
|
148
|
+
3. Fill reasoning gaps with step-by-step logic
|
|
149
|
+
4. Make vague recommendations specific and actionable
|
|
150
|
+
5. Fix any coherence issues between turns
|
|
151
|
+
6. Ensure progressive depth in the conversation
|
|
152
|
+
|
|
153
|
+
IMPORTANT: Maintain the same conversation structure (same number of turns, same topics).
|
|
154
|
+
Only improve the CONTENT of the assistant responses.
|
|
155
|
+
|
|
156
|
+
Output the improved conversation with all turns."""
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""QA-specific prompt templates for question-answer pair generation."""
|
|
2
|
+
|
|
3
|
+
QA_SCENARIO_PROMPT = """You are an expert at creating factual questions from documents.
|
|
4
|
+
|
|
5
|
+
Given a document, generate diverse questions that can be answered directly from the content.
|
|
6
|
+
|
|
7
|
+
Types of questions to generate:
|
|
8
|
+
1. **Factual** - Who, what, when, where questions with direct answers
|
|
9
|
+
2. **Definitional** - "What is..." or "Define..." questions
|
|
10
|
+
3. **Procedural** - "How do you..." or "What are the steps..."
|
|
11
|
+
4. **Comparative** - Questions comparing concepts within the document
|
|
12
|
+
5. **Inferential** - Questions requiring light reasoning from stated facts
|
|
13
|
+
|
|
14
|
+
Make each question:
|
|
15
|
+
- Answerable from the document (no external knowledge needed)
|
|
16
|
+
- Specific and unambiguous
|
|
17
|
+
- Varied in complexity and type
|
|
18
|
+
- Natural - how a real person would ask
|
|
19
|
+
|
|
20
|
+
Focus on creating questions that test comprehension of the document content."""
|
|
21
|
+
|
|
22
|
+
QA_RESPONSE_PROMPT = """You are answering questions using ONLY information from the provided document.
|
|
23
|
+
|
|
24
|
+
Rules:
|
|
25
|
+
1. Answer ONLY using facts stated in the document
|
|
26
|
+
2. Quote or paraphrase the relevant section
|
|
27
|
+
3. If the answer isn't in the document, say "Not found in document"
|
|
28
|
+
4. Keep answers concise but complete
|
|
29
|
+
5. Include the source section/paragraph when possible
|
|
30
|
+
|
|
31
|
+
Your response must be a JSON object:
|
|
32
|
+
{{
|
|
33
|
+
"question": "<the question being answered>",
|
|
34
|
+
"answer": "<your answer using document facts>",
|
|
35
|
+
"context": "<the relevant passage from the document>"
|
|
36
|
+
}}
|
|
37
|
+
|
|
38
|
+
DOCUMENT:
|
|
39
|
+
{policy}
|
|
40
|
+
|
|
41
|
+
QUESTION:
|
|
42
|
+
{scenario}
|
|
43
|
+
|
|
44
|
+
Respond with ONLY the JSON object."""
|
|
45
|
+
|
|
46
|
+
QA_GRADE_PROMPT = """You are grading a question-answer pair for quality.
|
|
47
|
+
|
|
48
|
+
A QA pair PASSES only if ALL are true:
|
|
49
|
+
1. **Factually Correct** - Answer is accurate based on the document
|
|
50
|
+
2. **Properly Sourced** - Context contains the relevant passage
|
|
51
|
+
3. **Complete** - Answer fully addresses the question
|
|
52
|
+
4. **Concise** - No unnecessary information or padding
|
|
53
|
+
5. **Grounded** - No information made up beyond the document
|
|
54
|
+
|
|
55
|
+
DOCUMENT:
|
|
56
|
+
{policy}
|
|
57
|
+
|
|
58
|
+
QUESTION:
|
|
59
|
+
{scenario}
|
|
60
|
+
|
|
61
|
+
ANSWER TO GRADE:
|
|
62
|
+
{response}
|
|
63
|
+
|
|
64
|
+
Respond with ONLY a JSON object:
|
|
65
|
+
{{
|
|
66
|
+
"pass": <true/false>,
|
|
67
|
+
"factual_errors": ["<error 1>", ...],
|
|
68
|
+
"missing_info": ["<missing 1>", ...],
|
|
69
|
+
"source_issues": ["<issue 1>", ...],
|
|
70
|
+
"feedback": "<summary of issues or 'Correct'>"
|
|
71
|
+
}}"""
|
|
72
|
+
|
|
73
|
+
QA_REFINE_PROMPT = """You are improving a question-answer pair based on feedback.
|
|
74
|
+
|
|
75
|
+
Fix all issues while maintaining accuracy to the source document.
|
|
76
|
+
|
|
77
|
+
DOCUMENT:
|
|
78
|
+
{policy}
|
|
79
|
+
|
|
80
|
+
QUESTION:
|
|
81
|
+
{scenario}
|
|
82
|
+
|
|
83
|
+
ORIGINAL ANSWER:
|
|
84
|
+
{response}
|
|
85
|
+
|
|
86
|
+
ISSUES TO FIX:
|
|
87
|
+
{feedback}
|
|
88
|
+
|
|
89
|
+
Generate an IMPROVED answer. Output a JSON object:
|
|
90
|
+
{{
|
|
91
|
+
"question": "<the question>",
|
|
92
|
+
"answer": "<your IMPROVED answer>",
|
|
93
|
+
"context": "<the relevant passage from the document>"
|
|
94
|
+
}}
|
|
95
|
+
|
|
96
|
+
Respond with ONLY the JSON object."""
|
|
97
|
+
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""Universal prompt templates for dataset generation across ANY domain."""
|
|
2
|
+
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# POLICY ANALYSIS PROMPTS
|
|
5
|
+
# =============================================================================
|
|
6
|
+
|
|
7
|
+
POLICY_COMPLEXITY_PROMPT = """You are an expert at analyzing policy documents to determine their complexity.
|
|
8
|
+
|
|
9
|
+
Analyze the given policy and determine the optimal number of conversation turns needed to properly test understanding.
|
|
10
|
+
|
|
11
|
+
Guidelines:
|
|
12
|
+
- **Simple (1-2 turns)**: Policy has 1 clear variable/rule. Single query → Straight answer.
|
|
13
|
+
Example: "All data must be encrypted" - just one rule to check.
|
|
14
|
+
|
|
15
|
+
- **Conditional (3 turns)**: Policy has 2-3 variables/conditions. Query → Clarification → Verdict.
|
|
16
|
+
Example: "Data can be shared IF consent is given AND purpose is specified" - needs clarification.
|
|
17
|
+
|
|
18
|
+
- **Complex (5+ turns)**: Policy has 4+ nested variables, exceptions, or conditions.
|
|
19
|
+
Multiple rounds of validation before final sign-off.
|
|
20
|
+
Example: "Data retention varies by type, region, consent status, and business need" - needs deep exploration.
|
|
21
|
+
|
|
22
|
+
Count the following as "variables":
|
|
23
|
+
- Distinct rules or requirements
|
|
24
|
+
- Conditional branches (if/then/else)
|
|
25
|
+
- Exceptions to rules
|
|
26
|
+
- Categories or types that affect decisions
|
|
27
|
+
- Time-based conditions
|
|
28
|
+
- Role-based permissions
|
|
29
|
+
|
|
30
|
+
Respond with your analysis."""
|
|
31
|
+
|
|
32
|
+
POLICY_PLANNING_PROMPT = """You are an expert at creating training data plans for AI models across ANY domain.
|
|
33
|
+
|
|
34
|
+
Given a task description, policy, or domain specification and a target number of traces, analyze the content and create an optimal plan for generating training data.
|
|
35
|
+
|
|
36
|
+
Your task:
|
|
37
|
+
1. Deeply analyze the domain/task to understand its core concepts, rules, processes, and challenges
|
|
38
|
+
2. Identify distinct SCENARIO CATEGORIES that test different aspects of the domain
|
|
39
|
+
3. Distribute the target traces across categories based on complexity and importance
|
|
40
|
+
4. Ensure coverage of: clear violations/errors, edge cases, happy paths, real-world constraints, and domain-specific challenges
|
|
41
|
+
|
|
42
|
+
Guidelines for dynamic category creation:
|
|
43
|
+
- **Analyze the domain deeply**: Understand the core rules, processes, stakeholders, and common challenges
|
|
44
|
+
- **Create domain-specific categories**: Base categories on the actual content, not generic assumptions
|
|
45
|
+
- **Balance complexity**: Allocate based on domain complexity (simple domains: 60% happy paths, complex domains: 40% edge cases)
|
|
46
|
+
- **Ensure comprehensive coverage**: Every major aspect of the domain should be tested
|
|
47
|
+
- **Consider domain-specific challenges**: Time pressure in trading, regulatory changes in finance, technical failures in engineering, etc.
|
|
48
|
+
|
|
49
|
+
For each category, provide:
|
|
50
|
+
- name: Short descriptive name specific to the domain
|
|
51
|
+
- description: What this category tests, including specific domain concepts and challenges
|
|
52
|
+
- traces: Number of traces to generate (must sum to target)
|
|
53
|
+
|
|
54
|
+
Provide detailed reasoning explaining:
|
|
55
|
+
1. Your analysis of the domain's core concepts and challenges
|
|
56
|
+
2. Why you chose these specific categories for this domain
|
|
57
|
+
3. How the category distribution reflects the domain's complexity and real-world usage patterns"""
|
|
58
|
+
|
|
59
|
+
# =============================================================================
|
|
60
|
+
# SCENARIO GENERATION PROMPTS
|
|
61
|
+
# =============================================================================
|
|
62
|
+
|
|
63
|
+
SCENARIO_GENERATOR_PROMPT = """You are an expert at creating realistic scenarios for ANY domain or task.
|
|
64
|
+
|
|
65
|
+
Given a task description, policy, or domain specification, first deeply analyze the domain to understand:
|
|
66
|
+
- Core concepts, rules, and processes
|
|
67
|
+
- Key stakeholders and their roles
|
|
68
|
+
- Common challenges and failure modes
|
|
69
|
+
- Domain-specific terminology and workflows
|
|
70
|
+
|
|
71
|
+
Then generate diverse scenarios that thoroughly test understanding of the domain:
|
|
72
|
+
|
|
73
|
+
1. **Clear Success/Failure Cases** - Obvious correct/incorrect applications of domain rules
|
|
74
|
+
2. **Edge Cases** - Ambiguous situations with multiple valid interpretations
|
|
75
|
+
3. **Multi-Step Processes** - Complex scenarios requiring sequential reasoning
|
|
76
|
+
4. **Real-World Constraints** - Practical limitations like time pressure, incomplete info, resource constraints
|
|
77
|
+
5. **Domain-Specific Challenges** - Scenarios that test unique aspects of this particular domain
|
|
78
|
+
6. **Stakeholder Interactions** - Situations involving coordination between different parties
|
|
79
|
+
7. **Exception Handling** - Scenarios requiring deviation from standard processes
|
|
80
|
+
|
|
81
|
+
Make each scenario:
|
|
82
|
+
- Deeply grounded in the specific domain's concepts and terminology
|
|
83
|
+
- Realistic and challenging for someone working in that domain
|
|
84
|
+
- Specific with concrete details that reflect actual domain practices
|
|
85
|
+
- Varied in complexity and stakeholder perspectives
|
|
86
|
+
- Designed to reveal both expert and novice understanding gaps
|
|
87
|
+
|
|
88
|
+
Focus on creating "golden traces" - perfect examples that demonstrate deep domain mastery."""
|
|
89
|
+
|
|
90
|
+
CATEGORY_SCENARIO_PROMPT = """You are an expert at creating realistic scenarios for ANY domain or task.
|
|
91
|
+
|
|
92
|
+
Generate scenarios specifically for the following CATEGORY within the given domain:
|
|
93
|
+
**Category Name**: {CATEGORY_NAME}
|
|
94
|
+
**Category Description**: {CATEGORY_DESCRIPTION}
|
|
95
|
+
|
|
96
|
+
First, deeply understand:
|
|
97
|
+
- How this category fits into the broader domain
|
|
98
|
+
- What specific skills or knowledge this category tests
|
|
99
|
+
- The real-world contexts where this category applies
|
|
100
|
+
- Common mistakes or misconceptions in this category
|
|
101
|
+
|
|
102
|
+
All generated scenarios MUST:
|
|
103
|
+
- Perfectly fit this specific category's focus and objectives
|
|
104
|
+
- Demonstrate deep understanding of the category's role in the domain
|
|
105
|
+
- Test the exact skills and knowledge described in the category
|
|
106
|
+
- Be realistic and occur in actual domain practice
|
|
107
|
+
|
|
108
|
+
Make each scenario:
|
|
109
|
+
- Highly specific with concrete details that reflect domain expertise
|
|
110
|
+
- Challenging and nuanced - not simplistic examples
|
|
111
|
+
- Varied in stakeholder perspectives, contexts, and complexity levels
|
|
112
|
+
- Different from each other (no duplicates) - explore different facets of the category
|
|
113
|
+
- Include domain-specific terminology, processes, and challenges
|
|
114
|
+
- Designed as "golden traces" that showcase expert-level understanding
|
|
115
|
+
|
|
116
|
+
Focus on creating scenarios that would distinguish between novice and expert performance in this category."""
|
|
117
|
+
|
|
118
|
+
# =============================================================================
|
|
119
|
+
# SYSTEM PROMPT
|
|
120
|
+
# =============================================================================
|
|
121
|
+
|
|
122
|
+
SYSTEM_PROMPT = """You are a domain expert. When given a scenario and context, provide comprehensive, expert-level guidance.
|
|
123
|
+
|
|
124
|
+
IMPORTANT: Always show your reasoning process using <reasoning> tags before giving your answer.
|
|
125
|
+
|
|
126
|
+
Your responses must:
|
|
127
|
+
- Start with <reasoning> tags showing step-by-step analysis
|
|
128
|
+
- Cite specific domain concepts, rules, or processes that apply
|
|
129
|
+
- Give specific, actionable recommendations grounded in domain best practices
|
|
130
|
+
- Address all aspects of the scenario from multiple stakeholder perspectives
|
|
131
|
+
- Acknowledge edge cases, exceptions, and potential complications
|
|
132
|
+
- Consider contemporary challenges and modern practices in the domain
|
|
133
|
+
|
|
134
|
+
Vary your response style while maintaining expertise:
|
|
135
|
+
- For concise responses: Direct, focused guidance with key domain principles
|
|
136
|
+
- For detailed responses: Comprehensive analysis with structured breakdowns and examples
|
|
137
|
+
- For practical responses: Step-by-step implementation guides and checklists
|
|
138
|
+
- For complex responses: Thorough exploration of trade-offs and alternative approaches
|
|
139
|
+
|
|
140
|
+
Always prioritize accuracy, clarity, and deep domain understanding in your guidance."""
|
|
141
|
+
|
|
142
|
+
# =============================================================================
|
|
143
|
+
# BATCHED PROMPTS (for batch generation)
|
|
144
|
+
# =============================================================================
|
|
145
|
+
|
|
146
|
+
BATCHED_RESPONSE_PROMPT = """You are generating training data for a domain expert model.
|
|
147
|
+
|
|
148
|
+
For EACH scenario below, create a complete training example in CHAT MESSAGES FORMAT.
|
|
149
|
+
|
|
150
|
+
Each training example must have exactly 3 messages:
|
|
151
|
+
1. "system" - The system prompt defining the assistant's role
|
|
152
|
+
2. "user" - The scenario and context as the user's question
|
|
153
|
+
3. "assistant" - Your expert guidance response
|
|
154
|
+
|
|
155
|
+
The assistant response must:
|
|
156
|
+
- Cite specific policy sections that apply
|
|
157
|
+
- Explain reasoning step-by-step
|
|
158
|
+
- Give specific, actionable recommendations
|
|
159
|
+
- Address all aspects of the scenario
|
|
160
|
+
- Acknowledge edge cases and complications
|
|
161
|
+
|
|
162
|
+
Respond with a JSON array where each object has:
|
|
163
|
+
- "index": the scenario number (0-based)
|
|
164
|
+
- "messages": array of 3 message objects with "role" and "content" fields"""
|
|
165
|
+
|
|
166
|
+
BATCHED_GRADER_PROMPT = """You are a strict policy compliance evaluator. Your job is to determine if EACH response is FULLY CORRECT.
|
|
167
|
+
|
|
168
|
+
A response PASSES only if ALL of the following are true:
|
|
169
|
+
1. **Policy Compliant** - Every recommendation follows the policy exactly. No violations.
|
|
170
|
+
2. **Fully Supported** - Every claim is backed by a specific policy section. Nothing made up.
|
|
171
|
+
3. **Properly Cited** - All relevant policy sections are explicitly referenced.
|
|
172
|
+
4. **Complete Reasoning** - The chain of thought is complete with no gaps or skipped steps.
|
|
173
|
+
5. **Actionable & Specific** - All recommendations are concrete and implementable, not vague.
|
|
174
|
+
|
|
175
|
+
If ANY of these fail, the response does NOT pass. Be strict - only mark "pass": true for perfect responses.
|
|
176
|
+
|
|
177
|
+
For each response, provide structured feedback:
|
|
178
|
+
- "policy_violations": List any rules misinterpreted or violated
|
|
179
|
+
- "missing_citations": List policy sections that should have been cited
|
|
180
|
+
- "incomplete_reasoning": List logical gaps or missing reasoning steps
|
|
181
|
+
- "vague_recommendations": List recommendations that need to be more specific
|
|
182
|
+
- "feedback": Summary of what needs to be fixed
|
|
183
|
+
|
|
184
|
+
Respond with a JSON array where each object has:
|
|
185
|
+
- "index": the scenario number (0-based)
|
|
186
|
+
- "pass": boolean (true ONLY if response is fully correct)
|
|
187
|
+
- "policy_violations": array of violations
|
|
188
|
+
- "missing_citations": array of missing citations
|
|
189
|
+
- "incomplete_reasoning": array of reasoning gaps
|
|
190
|
+
- "vague_recommendations": array of vague items
|
|
191
|
+
- "feedback": summary of how to fix"""
|
|
192
|
+
|
|
193
|
+
BATCHED_REFINER_PROMPT = """You are improving training data for a domain expert model based on grader feedback.
|
|
194
|
+
|
|
195
|
+
For EACH scenario with feedback below, fix ALL issues while keeping what was correct.
|
|
196
|
+
|
|
197
|
+
You will receive structured feedback with:
|
|
198
|
+
- policy_violations: Rules you violated or misinterpreted - FIX THESE
|
|
199
|
+
- missing_citations: Policy sections you should cite - ADD THESE
|
|
200
|
+
- incomplete_reasoning: Gaps in your logic - FILL THESE IN
|
|
201
|
+
- vague_recommendations: Things that need to be more specific - MAKE CONCRETE
|
|
202
|
+
|
|
203
|
+
Requirements:
|
|
204
|
+
1. Fix every policy violation - ensure recommendations follow the policy exactly
|
|
205
|
+
2. Add citations for every missing policy section mentioned
|
|
206
|
+
3. Complete any incomplete reasoning chains with step-by-step logic
|
|
207
|
+
4. Replace vague language with specific, actionable recommendations
|
|
208
|
+
5. Keep the parts that were already correct
|
|
209
|
+
|
|
210
|
+
Output in CHAT MESSAGES FORMAT with exactly 3 messages:
|
|
211
|
+
1. "system" - The system prompt defining the assistant's role
|
|
212
|
+
2. "user" - The scenario and context as the user's question
|
|
213
|
+
3. "assistant" - Your IMPROVED guidance
|
|
214
|
+
|
|
215
|
+
Respond with a JSON array where each object has:
|
|
216
|
+
- "index": the scenario number (0-based)
|
|
217
|
+
- "messages": array of 3 message objects with "role" and "content" fields"""
|
|
218
|
+
|
|
219
|
+
# =============================================================================
|
|
220
|
+
# SINGLE PROMPTS (for parallel high-concurrency generation)
|
|
221
|
+
# =============================================================================
|
|
222
|
+
|
|
223
|
+
SINGLE_RESPONSE_PROMPT = """You are a domain expert generating a training example.
|
|
224
|
+
|
|
225
|
+
Given the scenario and policy below, create a complete training example.
|
|
226
|
+
|
|
227
|
+
Your response must be a JSON object with exactly 3 messages:
|
|
228
|
+
{{
|
|
229
|
+
"messages": [
|
|
230
|
+
{{"role": "system", "content": "<system prompt defining expert role>"}},
|
|
231
|
+
{{"role": "user", "content": "<the scenario as a user question>"}},
|
|
232
|
+
{{"role": "assistant", "content": "<your expert response>"}}
|
|
233
|
+
]
|
|
234
|
+
}}
|
|
235
|
+
|
|
236
|
+
The assistant response must:
|
|
237
|
+
- Start with <reasoning> tags showing your thought process
|
|
238
|
+
- Cite specific policy sections that apply
|
|
239
|
+
- Give specific, actionable recommendations
|
|
240
|
+
- Address all aspects of the scenario
|
|
241
|
+
- Acknowledge edge cases and complications
|
|
242
|
+
|
|
243
|
+
SCENARIO:
|
|
244
|
+
{scenario}
|
|
245
|
+
|
|
246
|
+
CONTEXT:
|
|
247
|
+
{context}
|
|
248
|
+
|
|
249
|
+
POLICY:
|
|
250
|
+
{policy}
|
|
251
|
+
|
|
252
|
+
Respond with ONLY the JSON object, no additional text."""
|
|
253
|
+
|
|
254
|
+
SINGLE_GRADE_PROMPT = """You are a strict evaluator. Grade this response.
|
|
255
|
+
|
|
256
|
+
A response PASSES only if ALL are true:
|
|
257
|
+
1. Policy Compliant - Every recommendation follows the policy exactly
|
|
258
|
+
2. Fully Supported - Every claim backed by specific policy section
|
|
259
|
+
3. Properly Cited - All relevant policy sections referenced
|
|
260
|
+
4. Complete Reasoning - Chain of thought has no gaps
|
|
261
|
+
5. Actionable & Specific - Recommendations are concrete, not vague
|
|
262
|
+
|
|
263
|
+
SCENARIO:
|
|
264
|
+
{scenario}
|
|
265
|
+
|
|
266
|
+
POLICY:
|
|
267
|
+
{policy}
|
|
268
|
+
|
|
269
|
+
RESPONSE TO GRADE:
|
|
270
|
+
{response}
|
|
271
|
+
|
|
272
|
+
Respond with ONLY a JSON object:
|
|
273
|
+
{{
|
|
274
|
+
"pass": <true/false>,
|
|
275
|
+
"policy_violations": ["<violation 1>", ...],
|
|
276
|
+
"missing_citations": ["<missing 1>", ...],
|
|
277
|
+
"incomplete_reasoning": ["<gap 1>", ...],
|
|
278
|
+
"vague_recommendations": ["<vague 1>", ...],
|
|
279
|
+
"feedback": "<summary of issues or 'Correct'>"
|
|
280
|
+
}}"""
|
|
281
|
+
|