synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Built-in example policies for instant demos."""
|
|
2
|
+
|
|
3
|
+
EXPENSE_POLICY = """# Company Expense Policy
|
|
4
|
+
|
|
5
|
+
## Approval Thresholds
|
|
6
|
+
- Expenses under $50: No approval required
|
|
7
|
+
- Expenses $50-$500: Manager approval required
|
|
8
|
+
- Expenses over $500: VP approval required
|
|
9
|
+
|
|
10
|
+
## Receipt Requirements
|
|
11
|
+
- All expenses over $25 must have a receipt
|
|
12
|
+
- Digital receipts are acceptable
|
|
13
|
+
- Missing receipts require written justification within 48 hours
|
|
14
|
+
|
|
15
|
+
## Categories
|
|
16
|
+
- Travel: Flights, hotels, ground transportation, meals while traveling
|
|
17
|
+
- Meals: Client meals, team events (max $75/person)
|
|
18
|
+
- Software: Must be on pre-approved list, exceptions need IT approval
|
|
19
|
+
- Equipment: Must be on asset tracking list if over $200
|
|
20
|
+
- Office Supplies: Under $100 can be purchased directly
|
|
21
|
+
|
|
22
|
+
## Reimbursement Timeline
|
|
23
|
+
- Submit expenses within 30 days of purchase
|
|
24
|
+
- Reimbursements processed within 14 business days
|
|
25
|
+
- Late submissions require manager exception approval
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
HR_HANDBOOK = """# Employee Handbook
|
|
29
|
+
|
|
30
|
+
## Work Hours
|
|
31
|
+
- Standard work week is 40 hours, Monday through Friday
|
|
32
|
+
- Core hours are 10am to 3pm when all employees should be available
|
|
33
|
+
- Flexible scheduling allowed with manager approval
|
|
34
|
+
|
|
35
|
+
## Time Off
|
|
36
|
+
- Full-time employees receive 15 days PTO per year
|
|
37
|
+
- PTO accrues monthly (1.25 days per month)
|
|
38
|
+
- Unused PTO can roll over up to 5 days
|
|
39
|
+
- PTO requests must be submitted 2 weeks in advance for 3+ days
|
|
40
|
+
|
|
41
|
+
## Remote Work
|
|
42
|
+
- Hybrid schedule: minimum 2 days in office per week
|
|
43
|
+
- Fully remote requires director approval
|
|
44
|
+
- Home office stipend of $500 for remote workers
|
|
45
|
+
|
|
46
|
+
## Performance Reviews
|
|
47
|
+
- Annual reviews conducted in December
|
|
48
|
+
- Mid-year check-ins in June
|
|
49
|
+
- Goals set at start of fiscal year
|
|
50
|
+
- Promotions considered during annual review cycle only
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
REFUND_POLICY = """# Return and Refund Policy
|
|
54
|
+
|
|
55
|
+
## Eligibility
|
|
56
|
+
- Items can be returned within 30 days of purchase
|
|
57
|
+
- Items must be unused and in original packaging
|
|
58
|
+
- Receipt or proof of purchase required
|
|
59
|
+
|
|
60
|
+
## Exceptions
|
|
61
|
+
- Final sale items cannot be returned
|
|
62
|
+
- Personalized items cannot be returned
|
|
63
|
+
- Perishable goods cannot be returned after 7 days
|
|
64
|
+
|
|
65
|
+
## Refund Process
|
|
66
|
+
- Refunds issued to original payment method
|
|
67
|
+
- Processing takes 5-10 business days
|
|
68
|
+
- Shipping costs are non-refundable unless item was defective
|
|
69
|
+
|
|
70
|
+
## Exchanges
|
|
71
|
+
- Exchanges available within 30 days
|
|
72
|
+
- Size exchanges free of charge
|
|
73
|
+
- Different item exchanges treated as return + new purchase
|
|
74
|
+
|
|
75
|
+
## Defective Items
|
|
76
|
+
- Report defects within 14 days
|
|
77
|
+
- Photos required for defect claims
|
|
78
|
+
- Replacement or full refund offered for confirmed defects
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
SUPPORT_GUIDELINES = """# Customer Support Guidelines
|
|
82
|
+
|
|
83
|
+
## Response Times
|
|
84
|
+
- Chat: Respond within 2 minutes
|
|
85
|
+
- Email: Respond within 4 hours during business hours
|
|
86
|
+
- Phone: Answer within 30 seconds, max hold time 3 minutes
|
|
87
|
+
|
|
88
|
+
## Escalation Tiers
|
|
89
|
+
- Tier 1: General questions, password resets, basic troubleshooting
|
|
90
|
+
- Tier 2: Technical issues, billing disputes, account problems
|
|
91
|
+
- Tier 3: Complex technical issues, executive escalations
|
|
92
|
+
|
|
93
|
+
## Refund Authority
|
|
94
|
+
- Tier 1 can issue refunds up to $50
|
|
95
|
+
- Tier 2 can issue refunds up to $200
|
|
96
|
+
- Tier 3 or manager approval needed for refunds over $200
|
|
97
|
+
|
|
98
|
+
## Documentation
|
|
99
|
+
- Log all customer interactions in CRM
|
|
100
|
+
- Include customer sentiment and issue category
|
|
101
|
+
- Note any promised follow-ups with deadlines
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
SECURITY_POLICY = """# Information Security Policy
|
|
105
|
+
|
|
106
|
+
## Password Requirements
|
|
107
|
+
- Minimum 12 characters
|
|
108
|
+
- Must include uppercase, lowercase, number, and symbol
|
|
109
|
+
- Change every 90 days
|
|
110
|
+
- Cannot reuse last 10 passwords
|
|
111
|
+
|
|
112
|
+
## Access Control
|
|
113
|
+
- Principle of least privilege applies
|
|
114
|
+
- Access requests require manager approval
|
|
115
|
+
- Quarterly access reviews mandatory
|
|
116
|
+
- Terminate access within 24 hours of employee departure
|
|
117
|
+
|
|
118
|
+
## Data Classification
|
|
119
|
+
- Public: Marketing materials, job postings
|
|
120
|
+
- Internal: Company announcements, policies
|
|
121
|
+
- Confidential: Customer data, financials
|
|
122
|
+
- Restricted: PII, payment info, credentials
|
|
123
|
+
|
|
124
|
+
## Incident Response
|
|
125
|
+
- Report security incidents within 1 hour
|
|
126
|
+
- Do not attempt to investigate independently
|
|
127
|
+
- Preserve evidence (don't delete logs or files)
|
|
128
|
+
- Security team leads all incident response
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
# All policies available as a list
|
|
132
|
+
ALL_POLICIES = [
|
|
133
|
+
("expense", EXPENSE_POLICY),
|
|
134
|
+
("hr", HR_HANDBOOK),
|
|
135
|
+
("refund", REFUND_POLICY),
|
|
136
|
+
("support", SUPPORT_GUIDELINES),
|
|
137
|
+
("security", SECURITY_POLICY),
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
__all__ = [
|
|
141
|
+
"EXPENSE_POLICY",
|
|
142
|
+
"HR_HANDBOOK",
|
|
143
|
+
"REFUND_POLICY",
|
|
144
|
+
"SUPPORT_GUIDELINES",
|
|
145
|
+
"SECURITY_POLICY",
|
|
146
|
+
"ALL_POLICIES",
|
|
147
|
+
]
|
|
148
|
+
|
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synkro
|
|
3
|
+
Version: 0.4.36
|
|
4
|
+
Summary: Generate training datasets from any document
|
|
5
|
+
Author: Murtaza Meerza
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: dataset-generation,fine-tuning,llm,synthetic-data,training-data
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
19
|
+
Requires-Dist: html2text>=2020.1
|
|
20
|
+
Requires-Dist: httpx>=0.25
|
|
21
|
+
Requires-Dist: litellm>=1.40
|
|
22
|
+
Requires-Dist: mammoth>=1.6
|
|
23
|
+
Requires-Dist: pydantic>=2.0
|
|
24
|
+
Requires-Dist: pymupdf>=1.24
|
|
25
|
+
Requires-Dist: rich>=13.0
|
|
26
|
+
Requires-Dist: typer>=0.9
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.1; extra == 'dev'
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# Synkro
|
|
34
|
+
|
|
35
|
+
Turn policies, handbooks, and documentation into high-quality training data for fine-tuning LLMs.
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- **Quality Evaluation** - Each response is graded and automatically refined if it fails
|
|
40
|
+
- **Multiple Formats** - Conversation (multi-turn), Instruction (single-turn), Evaluation (Q&A), and Tool Calling
|
|
41
|
+
- **Eval Platform Support** - Export to LangSmith, Langfuse, or generic Q&A format
|
|
42
|
+
- **Tool Call Training** - Generate OpenAI function calling format for teaching models to use custom tools
|
|
43
|
+
- **Top LLM Providers** - OpenAI, Anthropic, Google, and local models (Ollama, vLLM)
|
|
44
|
+
- **File Support** - PDF, DOCX, TXT, Markdown, URLs
|
|
45
|
+
- **CLI Included** - Generate datasets from the command line
|
|
46
|
+
- **Cost Tracking** - See total cost and LLM call breakdown after each generation
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install synkro
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from synkro.pipelines import create_pipeline
|
|
58
|
+
from synkro.models.google import Google
|
|
59
|
+
from synkro.types import DatasetType
|
|
60
|
+
|
|
61
|
+
pipeline = create_pipeline(
|
|
62
|
+
model=Google.GEMINI_25_FLASH, # Fast generation
|
|
63
|
+
grading_model=Google.GEMINI_25_PRO, # Quality grading
|
|
64
|
+
dataset_type=DatasetType.CONVERSATION,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
dataset = pipeline.generate(
|
|
68
|
+
"All expenses over $50 require manager approval.",
|
|
69
|
+
traces=50,
|
|
70
|
+
)
|
|
71
|
+
dataset.save("training.jsonl")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### From Files
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from synkro.pipelines import create_pipeline
|
|
78
|
+
from synkro.core.policy import Policy
|
|
79
|
+
|
|
80
|
+
policy = Policy.from_file("handbook.pdf") # PDF, DOCX, TXT, MD
|
|
81
|
+
pipeline = create_pipeline()
|
|
82
|
+
dataset = pipeline.generate(policy, traces=100)
|
|
83
|
+
dataset.save()
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### From URLs
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from synkro.core.policy import Policy
|
|
90
|
+
|
|
91
|
+
policy = Policy.from_url("https://example.com/terms")
|
|
92
|
+
dataset = pipeline.generate(policy)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Dataset Types
|
|
96
|
+
|
|
97
|
+
| Type | Turns | Output Formats | Best For |
|
|
98
|
+
|------|-------|----------------|----------|
|
|
99
|
+
| **CONVERSATION** | Multi | sft, chatml | Fine-tuning chat models |
|
|
100
|
+
| **INSTRUCTION** | 1 | sft, chatml | Instruction-following models |
|
|
101
|
+
| **EVALUATION** | 1 | qa, langsmith, langfuse | LLM evaluation & benchmarks |
|
|
102
|
+
| **TOOL_CALL** | Multi | tool_call, chatml | Teaching tool use |
|
|
103
|
+
|
|
104
|
+
### Conversation (Default)
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from synkro.types import DatasetType
|
|
108
|
+
|
|
109
|
+
pipeline = create_pipeline(dataset_type=DatasetType.CONVERSATION)
|
|
110
|
+
dataset = pipeline.generate(policy)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Output (multi-turn):
|
|
114
|
+
```json
|
|
115
|
+
{"messages": [
|
|
116
|
+
{"role": "user", "content": "What's the approval process for $350?"},
|
|
117
|
+
{"role": "assistant", "content": "For a $350 expense, you need manager approval..."},
|
|
118
|
+
{"role": "user", "content": "What if my manager is unavailable?"},
|
|
119
|
+
{"role": "assistant", "content": "You can request approval from..."}
|
|
120
|
+
]}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Instruction
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
pipeline = create_pipeline(dataset_type=DatasetType.INSTRUCTION)
|
|
127
|
+
dataset = pipeline.generate(policy)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Output (single-turn):
|
|
131
|
+
```json
|
|
132
|
+
{"messages": [
|
|
133
|
+
{"role": "user", "content": "What's the approval process for $350?"},
|
|
134
|
+
{"role": "assistant", "content": "For a $350 expense, you need manager approval. Submit the expense report with receipt..."}
|
|
135
|
+
]}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Evaluation
|
|
139
|
+
|
|
140
|
+
Generate Q&A datasets for LLM evaluation with ground truth:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
pipeline = create_pipeline(dataset_type=DatasetType.EVALUATION)
|
|
144
|
+
dataset = pipeline.generate(policy, traces=50)
|
|
145
|
+
|
|
146
|
+
# Save in different formats
|
|
147
|
+
dataset.save("eval.jsonl", format="qa") # Generic Q&A
|
|
148
|
+
dataset.save("eval.jsonl", format="langsmith") # LangSmith format
|
|
149
|
+
dataset.save("eval.jsonl", format="langfuse") # Langfuse format
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Output (`format="qa"`):
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"question": "Can I submit a $200 expense without a receipt?",
|
|
156
|
+
"answer": "All expenses require receipts per policy...",
|
|
157
|
+
"expected_outcome": "Deny - missing receipt violates R003",
|
|
158
|
+
"ground_truth_rules": ["R003", "R005"],
|
|
159
|
+
"difficulty": "negative",
|
|
160
|
+
"category": "Receipt Requirements"
|
|
161
|
+
}
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Output (`format="langsmith"`):
|
|
165
|
+
```json
|
|
166
|
+
{
|
|
167
|
+
"inputs": {"question": "...", "context": "..."},
|
|
168
|
+
"outputs": {"answer": "..."},
|
|
169
|
+
"metadata": {"expected_outcome": "...", "ground_truth_rules": [...]}
|
|
170
|
+
}
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Output (`format="langfuse"`):
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"input": {"question": "...", "context": "..."},
|
|
177
|
+
"expectedOutput": {"answer": "...", "expected_outcome": "..."},
|
|
178
|
+
"metadata": {"ground_truth_rules": [...], "difficulty": "..."}
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Tool Calling
|
|
183
|
+
|
|
184
|
+
Generate training data for teaching models when and how to use your custom tools:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from synkro import create_pipeline, ToolDefinition, DatasetType
|
|
188
|
+
|
|
189
|
+
# Define your tools
|
|
190
|
+
web_search = ToolDefinition(
|
|
191
|
+
name="web_search",
|
|
192
|
+
description="Search the web for current information",
|
|
193
|
+
parameters={
|
|
194
|
+
"type": "object",
|
|
195
|
+
"properties": {
|
|
196
|
+
"query": {"type": "string", "description": "Search query"}
|
|
197
|
+
},
|
|
198
|
+
"required": ["query"]
|
|
199
|
+
},
|
|
200
|
+
mock_responses=["NYC: 72°F, sunny", "BTC: $67,234"]
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Create pipeline with tools
|
|
204
|
+
pipeline = create_pipeline(
|
|
205
|
+
dataset_type=DatasetType.TOOL_CALL,
|
|
206
|
+
tools=[web_search],
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Generate from tool usage guidelines
|
|
210
|
+
dataset = pipeline.generate("""
|
|
211
|
+
Use web_search for real-time data like weather, prices.
|
|
212
|
+
Answer general questions directly without tools.
|
|
213
|
+
""", traces=20)
|
|
214
|
+
|
|
215
|
+
dataset.save("tool_training.jsonl", format="tool_call") # OpenAI format
|
|
216
|
+
dataset.save("tool_training.jsonl", format="chatml") # ChatML with XML tags
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
**Output Formats:**
|
|
220
|
+
|
|
221
|
+
OpenAI function calling (`format="tool_call"`):
|
|
222
|
+
```json
|
|
223
|
+
{"messages": [
|
|
224
|
+
{"role": "user", "content": "What's the weather in NYC?"},
|
|
225
|
+
{"role": "assistant", "content": null, "tool_calls": [
|
|
226
|
+
{"id": "call_abc", "type": "function", "function": {"name": "web_search", "arguments": "{\"query\": \"weather NYC\"}"}}
|
|
227
|
+
]},
|
|
228
|
+
{"role": "tool", "tool_call_id": "call_abc", "content": "NYC: 72°F, sunny"},
|
|
229
|
+
{"role": "assistant", "content": "The weather in NYC is 72°F and sunny."}
|
|
230
|
+
]}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
ChatML with XML tags (`format="chatml"`):
|
|
234
|
+
```json
|
|
235
|
+
{"messages": [
|
|
236
|
+
{"role": "user", "content": "What's the weather in NYC?"},
|
|
237
|
+
{"role": "assistant", "content": "<tool_call>\n{\"name\": \"web_search\", \"arguments\": {\"query\": \"weather NYC\"}}\n</tool_call>"},
|
|
238
|
+
{"role": "tool", "content": "<tool_response>\nNYC: 72°F, sunny\n</tool_response>"},
|
|
239
|
+
{"role": "assistant", "content": "The weather in NYC is 72°F and sunny."}
|
|
240
|
+
]}
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Evaluation & Grading
|
|
244
|
+
|
|
245
|
+
Every response is graded on policy compliance, citations, and reasoning. Failed responses are automatically refined (up to N iterations).
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
from synkro.pipelines import create_pipeline
|
|
249
|
+
from synkro.models.openai import OpenAI
|
|
250
|
+
|
|
251
|
+
pipeline = create_pipeline(
|
|
252
|
+
model=OpenAI.GPT_4O_MINI, # Fast generation
|
|
253
|
+
grading_model=OpenAI.GPT_4O, # Quality grading
|
|
254
|
+
max_iterations=3, # Refinement attempts
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
dataset = pipeline.generate(policy, traces=100)
|
|
258
|
+
|
|
259
|
+
# Check quality
|
|
260
|
+
print(f"Pass rate: {dataset.passing_rate:.1%}")
|
|
261
|
+
|
|
262
|
+
# Filter to only passing traces
|
|
263
|
+
high_quality = dataset.filter(passed=True)
|
|
264
|
+
high_quality.save("training.jsonl")
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## Eval API
|
|
268
|
+
|
|
269
|
+
Generate test scenarios and grade your own model's responses against policy compliance.
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
import synkro
|
|
273
|
+
|
|
274
|
+
# Generate scenarios with ground truth (no synthetic responses)
|
|
275
|
+
result = synkro.generate_scenarios(
|
|
276
|
+
policy="Expenses over $50 require manager approval...",
|
|
277
|
+
count=100,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Each scenario has ground truth labels
|
|
281
|
+
for scenario in result.scenarios:
|
|
282
|
+
print(scenario.user_message) # "Can I expense a $200 dinner?"
|
|
283
|
+
print(scenario.expected_outcome) # "Requires manager approval per R001"
|
|
284
|
+
print(scenario.target_rule_ids) # ["R001", "R003"]
|
|
285
|
+
print(scenario.scenario_type) # "positive" | "negative" | "edge_case"
|
|
286
|
+
|
|
287
|
+
# Grade YOUR model's responses
|
|
288
|
+
for scenario in result.scenarios:
|
|
289
|
+
response = my_model(scenario.user_message) # Your model
|
|
290
|
+
grade = synkro.grade(response, scenario, policy)
|
|
291
|
+
|
|
292
|
+
if not grade.passed:
|
|
293
|
+
print(f"Failed: {grade.feedback}")
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### When to Use
|
|
297
|
+
|
|
298
|
+
| Use Case | API |
|
|
299
|
+
|----------|-----|
|
|
300
|
+
| Generate training data | `synkro.generate()` |
|
|
301
|
+
| Generate eval scenarios | `synkro.generate_scenarios()` |
|
|
302
|
+
| Grade external model | `synkro.grade()` |
|
|
303
|
+
|
|
304
|
+
### Scenario Types
|
|
305
|
+
|
|
306
|
+
Scenarios are generated with balanced coverage:
|
|
307
|
+
|
|
308
|
+
| Type | % | Description |
|
|
309
|
+
|------|---|-------------|
|
|
310
|
+
| `positive` | 35% | Happy path - user meets all criteria |
|
|
311
|
+
| `negative` | 30% | Violations - user fails one criterion |
|
|
312
|
+
| `edge_case` | 25% | Boundary conditions at exact limits |
|
|
313
|
+
| `irrelevant` | 10% | Outside policy scope |
|
|
314
|
+
|
|
315
|
+
### EvalScenario Fields
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
scenario.user_message # The test input
|
|
319
|
+
scenario.expected_outcome # Ground truth behavior
|
|
320
|
+
scenario.target_rule_ids # Rules being tested
|
|
321
|
+
scenario.scenario_type # positive/negative/edge_case/irrelevant
|
|
322
|
+
scenario.category # Policy category
|
|
323
|
+
scenario.context # Additional context
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Temperature
|
|
327
|
+
|
|
328
|
+
Use `temperature` to control output diversity:
|
|
329
|
+
|
|
330
|
+
```python
|
|
331
|
+
# High temp for diverse scenario coverage
|
|
332
|
+
result = synkro.generate_scenarios(policy, temperature=0.8)
|
|
333
|
+
|
|
334
|
+
# Low temp for deterministic training data
|
|
335
|
+
dataset = synkro.generate(policy, temperature=0.2)
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
## Cost & Performance
|
|
339
|
+
|
|
340
|
+
Approximate costs using Gemini 2.5 Flash (multi-turn conversations):
|
|
341
|
+
|
|
342
|
+
| Traces | LLM Calls | Time | Cost |
|
|
343
|
+
|--------|-----------|------|------|
|
|
344
|
+
| 100 | ~335 | ~13 min | ~$3 |
|
|
345
|
+
| 500 | ~1,675 | ~1 hour | ~$14 |
|
|
346
|
+
| 1000 | ~3,350 | ~2 hours | ~$28 |
|
|
347
|
+
|
|
348
|
+
*Based on ~3.3 LLM calls per trace (generation + grading) with max_iterations=3. Actual costs vary by policy complexity and turn count.*
|
|
349
|
+
|
|
350
|
+
## Local LLMs
|
|
351
|
+
|
|
352
|
+
Run with Ollama, vLLM, or any OpenAI-compatible endpoint:
|
|
353
|
+
|
|
354
|
+
```python
|
|
355
|
+
from synkro import create_pipeline
|
|
356
|
+
from synkro.models import Local
|
|
357
|
+
|
|
358
|
+
# Ollama
|
|
359
|
+
pipeline = create_pipeline(model=Local.OLLAMA("llama3.2"))
|
|
360
|
+
|
|
361
|
+
# vLLM
|
|
362
|
+
pipeline = create_pipeline(model=Local.VLLM("mistral-7b"))
|
|
363
|
+
|
|
364
|
+
# Custom endpoint
|
|
365
|
+
pipeline = create_pipeline(model=Local.CUSTOM("my-model", endpoint="http://localhost:8080"))
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
**CLI:**
|
|
369
|
+
```bash
|
|
370
|
+
synkro generate policy.pdf --provider ollama --model llama3.2
|
|
371
|
+
synkro generate policy.pdf --provider vllm --endpoint http://localhost:8000
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
## CLI
|
|
375
|
+
|
|
376
|
+
```bash
|
|
377
|
+
# From file
|
|
378
|
+
synkro generate policy.pdf --traces 50
|
|
379
|
+
|
|
380
|
+
# From text
|
|
381
|
+
synkro generate "All expenses over $50 need approval" -n 20
|
|
382
|
+
|
|
383
|
+
# From URL
|
|
384
|
+
synkro generate https://example.com/policy -o training.jsonl
|
|
385
|
+
|
|
386
|
+
# Skip interactive mode
|
|
387
|
+
synkro generate policy.pdf --no-interactive
|
|
388
|
+
|
|
389
|
+
# Quick demo with built-in policy
|
|
390
|
+
synkro demo
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
**Options:**
|
|
394
|
+
- `--traces, -n` - Number of traces (default: 20)
|
|
395
|
+
- `--output, -o` - Output file path
|
|
396
|
+
- `--model, -m` - Model for generation
|
|
397
|
+
- `--format, -f` - Output format: `sft`, `qa`, `langsmith`, `langfuse`, `tool_call`, `chatml`
|
|
398
|
+
- `--provider, -p` - LLM provider for local models (`ollama`, `vllm`)
|
|
399
|
+
- `--endpoint, -e` - Custom API endpoint URL
|
|
400
|
+
- `--interactive/-i, --no-interactive/-I` - Review/edit extracted rules before generation (default: on)
|
|
401
|
+
|
|
402
|
+
## Interactive Mode
|
|
403
|
+
|
|
404
|
+
By default, synkro extracts policy rules into a Logic Map and lets you review/edit them before generation. The interactive session also shows the recommended conversation turns based on policy complexity:
|
|
405
|
+
|
|
406
|
+
```
|
|
407
|
+
╭─────────────────────────── Conversation Settings ────────────────────────────╮
|
|
408
|
+
│ Complexity: Conditional │
|
|
409
|
+
│ Turns: 3 │
|
|
410
|
+
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
411
|
+
|
|
412
|
+
╭────────────────────────── 📜 Logic Map (3 rules) ────────────────────────────╮
|
|
413
|
+
│ ├── R001: Expenses over $50 require manager approval │
|
|
414
|
+
│ ├── R002: Client meals limited to $75/person │
|
|
415
|
+
│ └── R003: Receipts required for all expenses │
|
|
416
|
+
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
417
|
+
|
|
418
|
+
Enter feedback: shorter conversations
|
|
419
|
+
✓ Set to 2 turns (User requested shorter/simpler conversations)
|
|
420
|
+
|
|
421
|
+
Enter feedback: add a rule for travel expenses
|
|
422
|
+
✓ Added R004: Travel expenses over $500 require VP approval
|
|
423
|
+
|
|
424
|
+
Enter feedback: done
|
|
425
|
+
✅ Session complete - 1 rule change(s), 2 turns
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
You can adjust both **conversation turns** and **rules** using natural language:
|
|
429
|
+
|
|
430
|
+
| Input | Action |
|
|
431
|
+
|-------|--------|
|
|
432
|
+
| `"shorter conversations"` | Reduce turns (1-2) |
|
|
433
|
+
| `"I want 5 turns"` | Set specific turn count |
|
|
434
|
+
| `"more thorough"` | Increase turns (5-6) |
|
|
435
|
+
| `"remove R002"` | Delete a rule |
|
|
436
|
+
| `"add a rule for..."` | Add new rule |
|
|
437
|
+
|
|
438
|
+
Commands: `done`, `undo`, `reset`, `show R001`, `help`
|
|
439
|
+
|
|
440
|
+
## Advanced Features
|
|
441
|
+
|
|
442
|
+
### Checkpointing
|
|
443
|
+
|
|
444
|
+
Resume interrupted generations:
|
|
445
|
+
|
|
446
|
+
```python
|
|
447
|
+
pipeline = create_pipeline(checkpoint_dir="./checkpoints")
|
|
448
|
+
dataset = pipeline.generate(policy, traces=100) # Resumes from checkpoint
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
### Dataset Operations
|
|
452
|
+
|
|
453
|
+
```python
|
|
454
|
+
# Filter by quality
|
|
455
|
+
high_quality = dataset.filter(passed=True)
|
|
456
|
+
|
|
457
|
+
# Remove duplicates
|
|
458
|
+
unique = dataset.dedupe(threshold=0.85)
|
|
459
|
+
|
|
460
|
+
# Check pass rate
|
|
461
|
+
print(f"Pass rate: {dataset.passing_rate:.1%}")
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
### Folder Loading
|
|
465
|
+
|
|
466
|
+
Generate from multiple documents at once:
|
|
467
|
+
|
|
468
|
+
```python
|
|
469
|
+
from synkro.core.policy import Policy
|
|
470
|
+
|
|
471
|
+
policy = Policy.from_file("policies/") # Loads all PDF, DOCX, TXT, MD files
|
|
472
|
+
dataset = pipeline.generate(policy, traces=100)
|
|
473
|
+
```
|
|
474
|
+
|
|
475
|
+
### Thinking Mode
|
|
476
|
+
|
|
477
|
+
Generate training data with explicit reasoning in `<think>` tags, compatible with Qwen3 and DeepSeek-R1:
|
|
478
|
+
|
|
479
|
+
```python
|
|
480
|
+
pipeline = create_pipeline(thinking=True)
|
|
481
|
+
dataset = pipeline.generate(policy, traces=50)
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
Output:
|
|
485
|
+
```json
|
|
486
|
+
{"messages": [
|
|
487
|
+
{"role": "user", "content": "Can I expense a $350 team dinner?"},
|
|
488
|
+
{"role": "assistant", "content": "<think>\nLet me check the expense policy...\n- Rule: Expenses over $50 require manager approval\n- $350 exceeds the $50 threshold\n- Manager approval is required\n</think>\n\nFor a $350 team dinner, you'll need manager approval since it exceeds the $50 threshold. Please submit your expense report with the receipt and request approval from your manager."}
|
|
489
|
+
]}
|
|
490
|
+
```
|
|
491
|
+
|
|
492
|
+
Works with all dataset types (`CONVERSATION`, `INSTRUCTION`, `TOOL_CALL`).
|
|
493
|
+
|
|
494
|
+
## Logic Map Inspection
|
|
495
|
+
|
|
496
|
+
Access the extracted rules programmatically:
|
|
497
|
+
|
|
498
|
+
```python
|
|
499
|
+
result = pipeline.generate(policy, traces=50, return_logic_map=True)
|
|
500
|
+
|
|
501
|
+
# Inspect extracted rules
|
|
502
|
+
for rule in result.logic_map.rules:
|
|
503
|
+
print(f"{rule.rule_id}: {rule.text}")
|
|
504
|
+
|
|
505
|
+
# Get the dataset
|
|
506
|
+
dataset = result.dataset
|
|
507
|
+
```
|