synkro 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +165 -0
- synkro/cli.py +120 -0
- synkro/core/__init__.py +7 -0
- synkro/core/dataset.py +233 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +160 -0
- synkro/formatters/__init__.py +12 -0
- synkro/formatters/qa.py +85 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/generator.py +163 -0
- synkro/generation/planner.py +87 -0
- synkro/generation/responses.py +160 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +370 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +235 -0
- synkro/llm/rate_limits.py +95 -0
- synkro/models/__init__.py +43 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +15 -0
- synkro/modes/config.py +66 -0
- synkro/modes/qa.py +18 -0
- synkro/modes/sft.py +18 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +237 -0
- synkro/pipeline/runner.py +198 -0
- synkro/pipelines.py +105 -0
- synkro/prompts/__init__.py +44 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/qa_templates.py +97 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +201 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/grader.py +130 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/reporting.py +213 -0
- synkro/schemas.py +325 -0
- synkro/types/__init__.py +41 -0
- synkro/types/core.py +113 -0
- synkro/types/dataset_type.py +30 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.5.data/data/examples/__init__.py +148 -0
- synkro-0.4.5.dist-info/METADATA +221 -0
- synkro-0.4.5.dist-info/RECORD +58 -0
- synkro-0.4.5.dist-info/WHEEL +4 -0
- synkro-0.4.5.dist-info/entry_points.txt +2 -0
- synkro-0.4.5.dist-info/licenses/LICENSE +21 -0
synkro/prompts/base.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Customizable prompt classes for building your own generation pipelines."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from synkro.prompts.templates import (
|
|
5
|
+
SYSTEM_PROMPT,
|
|
6
|
+
SCENARIO_GENERATOR_PROMPT,
|
|
7
|
+
BATCHED_RESPONSE_PROMPT,
|
|
8
|
+
BATCHED_GRADER_PROMPT,
|
|
9
|
+
BATCHED_REFINER_PROMPT,
|
|
10
|
+
POLICY_PLANNING_PROMPT,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SystemPrompt(BaseModel):
|
|
15
|
+
"""The system prompt that defines the expert's role and behavior."""
|
|
16
|
+
|
|
17
|
+
template: str = Field(default=SYSTEM_PROMPT)
|
|
18
|
+
|
|
19
|
+
def render(self, **kwargs) -> str:
|
|
20
|
+
"""Render the prompt with any custom variables."""
|
|
21
|
+
return self.template.format(**kwargs) if kwargs else self.template
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ScenarioPrompt(BaseModel):
|
|
25
|
+
"""Prompt for generating scenarios from policy documents."""
|
|
26
|
+
|
|
27
|
+
template: str = Field(default=SCENARIO_GENERATOR_PROMPT)
|
|
28
|
+
|
|
29
|
+
def render(self, policy: str, count: int, category: str | None = None) -> str:
|
|
30
|
+
"""
|
|
31
|
+
Render the scenario generation prompt.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
policy: The policy text
|
|
35
|
+
count: Number of scenarios to generate
|
|
36
|
+
category: Optional category to focus scenarios on
|
|
37
|
+
"""
|
|
38
|
+
prompt = f"{self.template}\n\nPOLICY:\n{policy}\n\nGenerate exactly {count} scenarios."
|
|
39
|
+
if category:
|
|
40
|
+
prompt += f"\n\nFocus on scenarios related to: {category}"
|
|
41
|
+
return prompt
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ResponsePrompt(BaseModel):
|
|
45
|
+
"""Prompt for generating responses to scenarios."""
|
|
46
|
+
|
|
47
|
+
template: str = Field(default=BATCHED_RESPONSE_PROMPT)
|
|
48
|
+
system_prompt: str = Field(default=SYSTEM_PROMPT)
|
|
49
|
+
|
|
50
|
+
def render(self, scenarios: list[dict], policy: str) -> str:
|
|
51
|
+
"""
|
|
52
|
+
Render the response generation prompt.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
scenarios: List of scenario dicts with 'description' and 'context'
|
|
56
|
+
policy: The policy text for grounding responses
|
|
57
|
+
"""
|
|
58
|
+
scenarios_text = "\n\n".join(
|
|
59
|
+
f"SCENARIO {i}:\n{s['description']}\n\nCONTEXT:\n{s['context']}"
|
|
60
|
+
for i, s in enumerate(scenarios)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return f"""{self.template}
|
|
64
|
+
|
|
65
|
+
SYSTEM PROMPT TO USE:
|
|
66
|
+
{self.system_prompt}
|
|
67
|
+
|
|
68
|
+
POLICY:
|
|
69
|
+
{policy}
|
|
70
|
+
|
|
71
|
+
SCENARIOS:
|
|
72
|
+
{scenarios_text}"""
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class GradePrompt(BaseModel):
|
|
76
|
+
"""Prompt for grading response quality."""
|
|
77
|
+
|
|
78
|
+
template: str = Field(default=BATCHED_GRADER_PROMPT)
|
|
79
|
+
|
|
80
|
+
def render(self, responses: list[dict], policy: str) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Render the grading prompt.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
responses: List of response dicts with messages
|
|
86
|
+
policy: The policy text to grade against
|
|
87
|
+
"""
|
|
88
|
+
responses_text = "\n\n".join(
|
|
89
|
+
f"RESPONSE {i}:\n{r.get('assistant_message', r.get('messages', [{}])[-1].get('content', ''))}"
|
|
90
|
+
for i, r in enumerate(responses)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return f"""{self.template}
|
|
94
|
+
|
|
95
|
+
POLICY:
|
|
96
|
+
{policy}
|
|
97
|
+
|
|
98
|
+
RESPONSES TO GRADE:
|
|
99
|
+
{responses_text}"""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class RefinePrompt(BaseModel):
|
|
103
|
+
"""Prompt for refining failed responses."""
|
|
104
|
+
|
|
105
|
+
template: str = Field(default=BATCHED_REFINER_PROMPT)
|
|
106
|
+
system_prompt: str = Field(default=SYSTEM_PROMPT)
|
|
107
|
+
|
|
108
|
+
def render(self, failed_items: list[dict], policy: str) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Render the refinement prompt.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
failed_items: List of dicts with 'scenario', 'response', and 'feedback'
|
|
114
|
+
policy: The policy text
|
|
115
|
+
"""
|
|
116
|
+
items_text = "\n\n".join(
|
|
117
|
+
f"""SCENARIO {i}:
|
|
118
|
+
{item['scenario']}
|
|
119
|
+
|
|
120
|
+
ORIGINAL RESPONSE:
|
|
121
|
+
{item['response']}
|
|
122
|
+
|
|
123
|
+
GRADER FEEDBACK:
|
|
124
|
+
- Policy Violations: {item.get('policy_violations', [])}
|
|
125
|
+
- Missing Citations: {item.get('missing_citations', [])}
|
|
126
|
+
- Incomplete Reasoning: {item.get('incomplete_reasoning', [])}
|
|
127
|
+
- Vague Recommendations: {item.get('vague_recommendations', [])}
|
|
128
|
+
- Summary: {item.get('feedback', '')}"""
|
|
129
|
+
for i, item in enumerate(failed_items)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return f"""{self.template}
|
|
133
|
+
|
|
134
|
+
SYSTEM PROMPT TO USE:
|
|
135
|
+
{self.system_prompt}
|
|
136
|
+
|
|
137
|
+
POLICY:
|
|
138
|
+
{policy}
|
|
139
|
+
|
|
140
|
+
ITEMS TO REFINE:
|
|
141
|
+
{items_text}"""
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class PlanPrompt(BaseModel):
|
|
145
|
+
"""Prompt for planning generation categories."""
|
|
146
|
+
|
|
147
|
+
template: str = Field(default=POLICY_PLANNING_PROMPT)
|
|
148
|
+
|
|
149
|
+
def render(self, policy: str, target_traces: int) -> str:
|
|
150
|
+
"""
|
|
151
|
+
Render the planning prompt.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
policy: The policy text to analyze
|
|
155
|
+
target_traces: Target number of traces to generate
|
|
156
|
+
"""
|
|
157
|
+
return f"""{self.template}
|
|
158
|
+
|
|
159
|
+
POLICY/DOMAIN SPECIFICATION:
|
|
160
|
+
{policy}
|
|
161
|
+
|
|
162
|
+
TARGET TRACES: {target_traces}
|
|
163
|
+
|
|
164
|
+
Respond with a JSON object containing:
|
|
165
|
+
- "categories": array of category objects with "name", "description", and "traces"
|
|
166
|
+
- "reasoning": explanation of your analysis and category choices"""
|
|
167
|
+
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""QA-specific prompt templates for question-answer pair generation."""
|
|
2
|
+
|
|
3
|
+
QA_SCENARIO_PROMPT = """You are an expert at creating factual questions from documents.
|
|
4
|
+
|
|
5
|
+
Given a document, generate diverse questions that can be answered directly from the content.
|
|
6
|
+
|
|
7
|
+
Types of questions to generate:
|
|
8
|
+
1. **Factual** - Who, what, when, where questions with direct answers
|
|
9
|
+
2. **Definitional** - "What is..." or "Define..." questions
|
|
10
|
+
3. **Procedural** - "How do you..." or "What are the steps..."
|
|
11
|
+
4. **Comparative** - Questions comparing concepts within the document
|
|
12
|
+
5. **Inferential** - Questions requiring light reasoning from stated facts
|
|
13
|
+
|
|
14
|
+
Make each question:
|
|
15
|
+
- Answerable from the document (no external knowledge needed)
|
|
16
|
+
- Specific and unambiguous
|
|
17
|
+
- Varied in complexity and type
|
|
18
|
+
- Natural - how a real person would ask
|
|
19
|
+
|
|
20
|
+
Focus on creating questions that test comprehension of the document content."""
|
|
21
|
+
|
|
22
|
+
QA_RESPONSE_PROMPT = """You are answering questions using ONLY information from the provided document.
|
|
23
|
+
|
|
24
|
+
Rules:
|
|
25
|
+
1. Answer ONLY using facts stated in the document
|
|
26
|
+
2. Quote or paraphrase the relevant section
|
|
27
|
+
3. If the answer isn't in the document, say "Not found in document"
|
|
28
|
+
4. Keep answers concise but complete
|
|
29
|
+
5. Include the source section/paragraph when possible
|
|
30
|
+
|
|
31
|
+
Your response must be a JSON object:
|
|
32
|
+
{{
|
|
33
|
+
"question": "<the question being answered>",
|
|
34
|
+
"answer": "<your answer using document facts>",
|
|
35
|
+
"context": "<the relevant passage from the document>"
|
|
36
|
+
}}
|
|
37
|
+
|
|
38
|
+
DOCUMENT:
|
|
39
|
+
{policy}
|
|
40
|
+
|
|
41
|
+
QUESTION:
|
|
42
|
+
{scenario}
|
|
43
|
+
|
|
44
|
+
Respond with ONLY the JSON object."""
|
|
45
|
+
|
|
46
|
+
QA_GRADE_PROMPT = """You are grading a question-answer pair for quality.
|
|
47
|
+
|
|
48
|
+
A QA pair PASSES only if ALL are true:
|
|
49
|
+
1. **Factually Correct** - Answer is accurate based on the document
|
|
50
|
+
2. **Properly Sourced** - Context contains the relevant passage
|
|
51
|
+
3. **Complete** - Answer fully addresses the question
|
|
52
|
+
4. **Concise** - No unnecessary information or padding
|
|
53
|
+
5. **Grounded** - No information made up beyond the document
|
|
54
|
+
|
|
55
|
+
DOCUMENT:
|
|
56
|
+
{policy}
|
|
57
|
+
|
|
58
|
+
QUESTION:
|
|
59
|
+
{scenario}
|
|
60
|
+
|
|
61
|
+
ANSWER TO GRADE:
|
|
62
|
+
{response}
|
|
63
|
+
|
|
64
|
+
Respond with ONLY a JSON object:
|
|
65
|
+
{{
|
|
66
|
+
"pass": <true/false>,
|
|
67
|
+
"factual_errors": ["<error 1>", ...],
|
|
68
|
+
"missing_info": ["<missing 1>", ...],
|
|
69
|
+
"source_issues": ["<issue 1>", ...],
|
|
70
|
+
"feedback": "<summary of issues or 'Correct'>"
|
|
71
|
+
}}"""
|
|
72
|
+
|
|
73
|
+
QA_REFINE_PROMPT = """You are improving a question-answer pair based on feedback.
|
|
74
|
+
|
|
75
|
+
Fix all issues while maintaining accuracy to the source document.
|
|
76
|
+
|
|
77
|
+
DOCUMENT:
|
|
78
|
+
{policy}
|
|
79
|
+
|
|
80
|
+
QUESTION:
|
|
81
|
+
{scenario}
|
|
82
|
+
|
|
83
|
+
ORIGINAL ANSWER:
|
|
84
|
+
{response}
|
|
85
|
+
|
|
86
|
+
ISSUES TO FIX:
|
|
87
|
+
{feedback}
|
|
88
|
+
|
|
89
|
+
Generate an IMPROVED answer. Output a JSON object:
|
|
90
|
+
{{
|
|
91
|
+
"question": "<the question>",
|
|
92
|
+
"answer": "<your IMPROVED answer>",
|
|
93
|
+
"context": "<the relevant passage from the document>"
|
|
94
|
+
}}
|
|
95
|
+
|
|
96
|
+
Respond with ONLY the JSON object."""
|
|
97
|
+
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""Universal prompt templates for dataset generation across ANY domain."""
|
|
2
|
+
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# POLICY ANALYSIS PROMPTS
|
|
5
|
+
# =============================================================================
|
|
6
|
+
|
|
7
|
+
POLICY_COMPLEXITY_PROMPT = """You are an expert at analyzing policy documents to determine their complexity.
|
|
8
|
+
|
|
9
|
+
Analyze the given policy and determine the optimal number of conversation turns needed to properly test understanding.
|
|
10
|
+
|
|
11
|
+
Guidelines:
|
|
12
|
+
- **Simple (1-2 turns)**: Policy has 1 clear variable/rule. Single query → Straight answer.
|
|
13
|
+
Example: "All data must be encrypted" - just one rule to check.
|
|
14
|
+
|
|
15
|
+
- **Conditional (3 turns)**: Policy has 2-3 variables/conditions. Query → Clarification → Verdict.
|
|
16
|
+
Example: "Data can be shared IF consent is given AND purpose is specified" - needs clarification.
|
|
17
|
+
|
|
18
|
+
- **Complex (5+ turns)**: Policy has 4+ nested variables, exceptions, or conditions.
|
|
19
|
+
Multiple rounds of validation before final sign-off.
|
|
20
|
+
Example: "Data retention varies by type, region, consent status, and business need" - needs deep exploration.
|
|
21
|
+
|
|
22
|
+
Count the following as "variables":
|
|
23
|
+
- Distinct rules or requirements
|
|
24
|
+
- Conditional branches (if/then/else)
|
|
25
|
+
- Exceptions to rules
|
|
26
|
+
- Categories or types that affect decisions
|
|
27
|
+
- Time-based conditions
|
|
28
|
+
- Role-based permissions
|
|
29
|
+
|
|
30
|
+
Respond with your analysis."""
|
|
31
|
+
|
|
32
|
+
POLICY_PLANNING_PROMPT = """You are an expert at creating training data plans for AI models across ANY domain.
|
|
33
|
+
|
|
34
|
+
Given a task description, policy, or domain specification and a target number of traces, analyze the content and create an optimal plan for generating training data.
|
|
35
|
+
|
|
36
|
+
Your task:
|
|
37
|
+
1. Deeply analyze the domain/task to understand its core concepts, rules, processes, and challenges
|
|
38
|
+
2. Identify distinct SCENARIO CATEGORIES that test different aspects of the domain
|
|
39
|
+
3. Distribute the target traces across categories based on complexity and importance
|
|
40
|
+
4. Ensure coverage of: clear violations/errors, edge cases, happy paths, real-world constraints, and domain-specific challenges
|
|
41
|
+
|
|
42
|
+
Guidelines for dynamic category creation:
|
|
43
|
+
- **Analyze the domain deeply**: Understand the core rules, processes, stakeholders, and common challenges
|
|
44
|
+
- **Create domain-specific categories**: Base categories on the actual content, not generic assumptions
|
|
45
|
+
- **Balance complexity**: Allocate based on domain complexity (simple domains: 60% happy paths, complex domains: 40% edge cases)
|
|
46
|
+
- **Ensure comprehensive coverage**: Every major aspect of the domain should be tested
|
|
47
|
+
- **Consider domain-specific challenges**: Time pressure in trading, regulatory changes in finance, technical failures in engineering, etc.
|
|
48
|
+
|
|
49
|
+
For each category, provide:
|
|
50
|
+
- name: Short descriptive name specific to the domain
|
|
51
|
+
- description: What this category tests, including specific domain concepts and challenges
|
|
52
|
+
- traces: Number of traces to generate (must sum to target)
|
|
53
|
+
|
|
54
|
+
Provide detailed reasoning explaining:
|
|
55
|
+
1. Your analysis of the domain's core concepts and challenges
|
|
56
|
+
2. Why you chose these specific categories for this domain
|
|
57
|
+
3. How the category distribution reflects the domain's complexity and real-world usage patterns"""
|
|
58
|
+
|
|
59
|
+
# =============================================================================
|
|
60
|
+
# SCENARIO GENERATION PROMPTS
|
|
61
|
+
# =============================================================================
|
|
62
|
+
|
|
63
|
+
SCENARIO_GENERATOR_PROMPT = """You are an expert at creating realistic scenarios for ANY domain or task.
|
|
64
|
+
|
|
65
|
+
Given a task description, policy, or domain specification, first deeply analyze the domain to understand:
|
|
66
|
+
- Core concepts, rules, and processes
|
|
67
|
+
- Key stakeholders and their roles
|
|
68
|
+
- Common challenges and failure modes
|
|
69
|
+
- Domain-specific terminology and workflows
|
|
70
|
+
|
|
71
|
+
Then generate diverse scenarios that thoroughly test understanding of the domain:
|
|
72
|
+
|
|
73
|
+
1. **Clear Success/Failure Cases** - Obvious correct/incorrect applications of domain rules
|
|
74
|
+
2. **Edge Cases** - Ambiguous situations with multiple valid interpretations
|
|
75
|
+
3. **Multi-Step Processes** - Complex scenarios requiring sequential reasoning
|
|
76
|
+
4. **Real-World Constraints** - Practical limitations like time pressure, incomplete info, resource constraints
|
|
77
|
+
5. **Domain-Specific Challenges** - Scenarios that test unique aspects of this particular domain
|
|
78
|
+
6. **Stakeholder Interactions** - Situations involving coordination between different parties
|
|
79
|
+
7. **Exception Handling** - Scenarios requiring deviation from standard processes
|
|
80
|
+
|
|
81
|
+
Make each scenario:
|
|
82
|
+
- Deeply grounded in the specific domain's concepts and terminology
|
|
83
|
+
- Realistic and challenging for someone working in that domain
|
|
84
|
+
- Specific with concrete details that reflect actual domain practices
|
|
85
|
+
- Varied in complexity and stakeholder perspectives
|
|
86
|
+
- Designed to reveal both expert and novice understanding gaps
|
|
87
|
+
|
|
88
|
+
Focus on creating "golden traces" - perfect examples that demonstrate deep domain mastery."""
|
|
89
|
+
|
|
90
|
+
CATEGORY_SCENARIO_PROMPT = """You are an expert at creating realistic scenarios for ANY domain or task.
|
|
91
|
+
|
|
92
|
+
Generate scenarios specifically for the following CATEGORY within the given domain:
|
|
93
|
+
**Category Name**: {CATEGORY_NAME}
|
|
94
|
+
**Category Description**: {CATEGORY_DESCRIPTION}
|
|
95
|
+
|
|
96
|
+
First, deeply understand:
|
|
97
|
+
- How this category fits into the broader domain
|
|
98
|
+
- What specific skills or knowledge this category tests
|
|
99
|
+
- The real-world contexts where this category applies
|
|
100
|
+
- Common mistakes or misconceptions in this category
|
|
101
|
+
|
|
102
|
+
All generated scenarios MUST:
|
|
103
|
+
- Perfectly fit this specific category's focus and objectives
|
|
104
|
+
- Demonstrate deep understanding of the category's role in the domain
|
|
105
|
+
- Test the exact skills and knowledge described in the category
|
|
106
|
+
- Be realistic and occur in actual domain practice
|
|
107
|
+
|
|
108
|
+
Make each scenario:
|
|
109
|
+
- Highly specific with concrete details that reflect domain expertise
|
|
110
|
+
- Challenging and nuanced - not simplistic examples
|
|
111
|
+
- Varied in stakeholder perspectives, contexts, and complexity levels
|
|
112
|
+
- Different from each other (no duplicates) - explore different facets of the category
|
|
113
|
+
- Include domain-specific terminology, processes, and challenges
|
|
114
|
+
- Designed as "golden traces" that showcase expert-level understanding
|
|
115
|
+
|
|
116
|
+
Focus on creating scenarios that would distinguish between novice and expert performance in this category."""
|
|
117
|
+
|
|
118
|
+
# =============================================================================
|
|
119
|
+
# SYSTEM PROMPT
|
|
120
|
+
# =============================================================================
|
|
121
|
+
|
|
122
|
+
SYSTEM_PROMPT = """You are a domain expert. When given a scenario and context, provide comprehensive, expert-level guidance.
|
|
123
|
+
|
|
124
|
+
IMPORTANT: Always show your reasoning process using <reasoning> tags before giving your answer.
|
|
125
|
+
|
|
126
|
+
Your responses must:
|
|
127
|
+
- Start with <reasoning> tags showing step-by-step analysis
|
|
128
|
+
- Cite specific domain concepts, rules, or processes that apply
|
|
129
|
+
- Give specific, actionable recommendations grounded in domain best practices
|
|
130
|
+
- Address all aspects of the scenario from multiple stakeholder perspectives
|
|
131
|
+
- Acknowledge edge cases, exceptions, and potential complications
|
|
132
|
+
- Consider contemporary challenges and modern practices in the domain
|
|
133
|
+
|
|
134
|
+
Vary your response style while maintaining expertise:
|
|
135
|
+
- For concise responses: Direct, focused guidance with key domain principles
|
|
136
|
+
- For detailed responses: Comprehensive analysis with structured breakdowns and examples
|
|
137
|
+
- For practical responses: Step-by-step implementation guides and checklists
|
|
138
|
+
- For complex responses: Thorough exploration of trade-offs and alternative approaches
|
|
139
|
+
|
|
140
|
+
Always prioritize accuracy, clarity, and deep domain understanding in your guidance."""
|
|
141
|
+
|
|
142
|
+
# =============================================================================
|
|
143
|
+
# BATCHED PROMPTS (for batch generation)
|
|
144
|
+
# =============================================================================
|
|
145
|
+
|
|
146
|
+
BATCHED_RESPONSE_PROMPT = """You are generating training data for a domain expert model.
|
|
147
|
+
|
|
148
|
+
For EACH scenario below, create a complete training example in CHAT MESSAGES FORMAT.
|
|
149
|
+
|
|
150
|
+
Each training example must have exactly 3 messages:
|
|
151
|
+
1. "system" - The system prompt defining the assistant's role
|
|
152
|
+
2. "user" - The scenario and context as the user's question
|
|
153
|
+
3. "assistant" - Your expert guidance response
|
|
154
|
+
|
|
155
|
+
The assistant response must:
|
|
156
|
+
- Cite specific policy sections that apply
|
|
157
|
+
- Explain reasoning step-by-step
|
|
158
|
+
- Give specific, actionable recommendations
|
|
159
|
+
- Address all aspects of the scenario
|
|
160
|
+
- Acknowledge edge cases and complications
|
|
161
|
+
|
|
162
|
+
Respond with a JSON array where each object has:
|
|
163
|
+
- "index": the scenario number (0-based)
|
|
164
|
+
- "messages": array of 3 message objects with "role" and "content" fields"""
|
|
165
|
+
|
|
166
|
+
BATCHED_GRADER_PROMPT = """You are a strict policy compliance evaluator. Your job is to determine if EACH response is FULLY CORRECT.
|
|
167
|
+
|
|
168
|
+
A response PASSES only if ALL of the following are true:
|
|
169
|
+
1. **Policy Compliant** - Every recommendation follows the policy exactly. No violations.
|
|
170
|
+
2. **Fully Supported** - Every claim is backed by a specific policy section. Nothing made up.
|
|
171
|
+
3. **Properly Cited** - All relevant policy sections are explicitly referenced.
|
|
172
|
+
4. **Complete Reasoning** - The chain of thought is complete with no gaps or skipped steps.
|
|
173
|
+
5. **Actionable & Specific** - All recommendations are concrete and implementable, not vague.
|
|
174
|
+
|
|
175
|
+
If ANY of these fail, the response does NOT pass. Be strict - only mark "pass": true for perfect responses.
|
|
176
|
+
|
|
177
|
+
For each response, provide structured feedback:
|
|
178
|
+
- "policy_violations": List any rules misinterpreted or violated
|
|
179
|
+
- "missing_citations": List policy sections that should have been cited
|
|
180
|
+
- "incomplete_reasoning": List logical gaps or missing reasoning steps
|
|
181
|
+
- "vague_recommendations": List recommendations that need to be more specific
|
|
182
|
+
- "feedback": Summary of what needs to be fixed
|
|
183
|
+
|
|
184
|
+
Respond with a JSON array where each object has:
|
|
185
|
+
- "index": the scenario number (0-based)
|
|
186
|
+
- "pass": boolean (true ONLY if response is fully correct)
|
|
187
|
+
- "policy_violations": array of violations
|
|
188
|
+
- "missing_citations": array of missing citations
|
|
189
|
+
- "incomplete_reasoning": array of reasoning gaps
|
|
190
|
+
- "vague_recommendations": array of vague items
|
|
191
|
+
- "feedback": summary of how to fix"""
|
|
192
|
+
|
|
193
|
+
BATCHED_REFINER_PROMPT = """You are improving training data for a domain expert model based on grader feedback.
|
|
194
|
+
|
|
195
|
+
For EACH scenario with feedback below, fix ALL issues while keeping what was correct.
|
|
196
|
+
|
|
197
|
+
You will receive structured feedback with:
|
|
198
|
+
- policy_violations: Rules you violated or misinterpreted - FIX THESE
|
|
199
|
+
- missing_citations: Policy sections you should cite - ADD THESE
|
|
200
|
+
- incomplete_reasoning: Gaps in your logic - FILL THESE IN
|
|
201
|
+
- vague_recommendations: Things that need to be more specific - MAKE CONCRETE
|
|
202
|
+
|
|
203
|
+
Requirements:
|
|
204
|
+
1. Fix every policy violation - ensure recommendations follow the policy exactly
|
|
205
|
+
2. Add citations for every missing policy section mentioned
|
|
206
|
+
3. Complete any incomplete reasoning chains with step-by-step logic
|
|
207
|
+
4. Replace vague language with specific, actionable recommendations
|
|
208
|
+
5. Keep the parts that were already correct
|
|
209
|
+
|
|
210
|
+
Output in CHAT MESSAGES FORMAT with exactly 3 messages:
|
|
211
|
+
1. "system" - The system prompt defining the assistant's role
|
|
212
|
+
2. "user" - The scenario and context as the user's question
|
|
213
|
+
3. "assistant" - Your IMPROVED guidance
|
|
214
|
+
|
|
215
|
+
Respond with a JSON array where each object has:
|
|
216
|
+
- "index": the scenario number (0-based)
|
|
217
|
+
- "messages": array of 3 message objects with "role" and "content" fields"""
|
|
218
|
+
|
|
219
|
+
# =============================================================================
|
|
220
|
+
# SINGLE PROMPTS (for parallel high-concurrency generation)
|
|
221
|
+
# =============================================================================
|
|
222
|
+
|
|
223
|
+
SINGLE_RESPONSE_PROMPT = """You are a domain expert generating a training example.
|
|
224
|
+
|
|
225
|
+
Given the scenario and policy below, create a complete training example.
|
|
226
|
+
|
|
227
|
+
Your response must be a JSON object with exactly 3 messages:
|
|
228
|
+
{{
|
|
229
|
+
"messages": [
|
|
230
|
+
{{"role": "system", "content": "<system prompt defining expert role>"}},
|
|
231
|
+
{{"role": "user", "content": "<the scenario as a user question>"}},
|
|
232
|
+
{{"role": "assistant", "content": "<your expert response>"}}
|
|
233
|
+
]
|
|
234
|
+
}}
|
|
235
|
+
|
|
236
|
+
The assistant response must:
|
|
237
|
+
- Start with <reasoning> tags showing your thought process
|
|
238
|
+
- Cite specific policy sections that apply
|
|
239
|
+
- Give specific, actionable recommendations
|
|
240
|
+
- Address all aspects of the scenario
|
|
241
|
+
- Acknowledge edge cases and complications
|
|
242
|
+
|
|
243
|
+
SCENARIO:
|
|
244
|
+
{scenario}
|
|
245
|
+
|
|
246
|
+
CONTEXT:
|
|
247
|
+
{context}
|
|
248
|
+
|
|
249
|
+
POLICY:
|
|
250
|
+
{policy}
|
|
251
|
+
|
|
252
|
+
Respond with ONLY the JSON object, no additional text."""
|
|
253
|
+
|
|
254
|
+
SINGLE_GRADE_PROMPT = """You are a strict evaluator. Grade this response.
|
|
255
|
+
|
|
256
|
+
A response PASSES only if ALL are true:
|
|
257
|
+
1. Policy Compliant - Every recommendation follows the policy exactly
|
|
258
|
+
2. Fully Supported - Every claim backed by specific policy section
|
|
259
|
+
3. Properly Cited - All relevant policy sections referenced
|
|
260
|
+
4. Complete Reasoning - Chain of thought has no gaps
|
|
261
|
+
5. Actionable & Specific - Recommendations are concrete, not vague
|
|
262
|
+
|
|
263
|
+
SCENARIO:
|
|
264
|
+
{scenario}
|
|
265
|
+
|
|
266
|
+
POLICY:
|
|
267
|
+
{policy}
|
|
268
|
+
|
|
269
|
+
RESPONSE TO GRADE:
|
|
270
|
+
{response}
|
|
271
|
+
|
|
272
|
+
Respond with ONLY a JSON object:
|
|
273
|
+
{{
|
|
274
|
+
"pass": <true/false>,
|
|
275
|
+
"policy_violations": ["<violation 1>", ...],
|
|
276
|
+
"missing_citations": ["<missing 1>", ...],
|
|
277
|
+
"incomplete_reasoning": ["<gap 1>", ...],
|
|
278
|
+
"vague_recommendations": ["<vague 1>", ...],
|
|
279
|
+
"feedback": "<summary of issues or 'Correct'>"
|
|
280
|
+
}}"""
|
|
281
|
+
|