synkro 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synkro might be problematic. Click here for more details.
- synkro/__init__.py +331 -0
- synkro/advanced.py +184 -0
- synkro/cli.py +156 -0
- synkro/core/__init__.py +7 -0
- synkro/core/checkpoint.py +250 -0
- synkro/core/dataset.py +432 -0
- synkro/core/policy.py +337 -0
- synkro/errors.py +178 -0
- synkro/examples/__init__.py +148 -0
- synkro/factory.py +291 -0
- synkro/formatters/__init__.py +18 -0
- synkro/formatters/chatml.py +121 -0
- synkro/formatters/langfuse.py +98 -0
- synkro/formatters/langsmith.py +98 -0
- synkro/formatters/qa.py +112 -0
- synkro/formatters/sft.py +90 -0
- synkro/formatters/tool_call.py +127 -0
- synkro/generation/__init__.py +9 -0
- synkro/generation/follow_ups.py +134 -0
- synkro/generation/generator.py +314 -0
- synkro/generation/golden_responses.py +269 -0
- synkro/generation/golden_scenarios.py +333 -0
- synkro/generation/golden_tool_responses.py +791 -0
- synkro/generation/logic_extractor.py +126 -0
- synkro/generation/multiturn_responses.py +177 -0
- synkro/generation/planner.py +131 -0
- synkro/generation/responses.py +189 -0
- synkro/generation/scenarios.py +90 -0
- synkro/generation/tool_responses.py +625 -0
- synkro/generation/tool_simulator.py +114 -0
- synkro/interactive/__init__.py +16 -0
- synkro/interactive/hitl_session.py +205 -0
- synkro/interactive/intent_classifier.py +94 -0
- synkro/interactive/logic_map_editor.py +176 -0
- synkro/interactive/rich_ui.py +459 -0
- synkro/interactive/scenario_editor.py +198 -0
- synkro/llm/__init__.py +7 -0
- synkro/llm/client.py +309 -0
- synkro/llm/rate_limits.py +99 -0
- synkro/models/__init__.py +50 -0
- synkro/models/anthropic.py +26 -0
- synkro/models/google.py +19 -0
- synkro/models/local.py +104 -0
- synkro/models/openai.py +31 -0
- synkro/modes/__init__.py +13 -0
- synkro/modes/config.py +66 -0
- synkro/modes/conversation.py +35 -0
- synkro/modes/tool_call.py +18 -0
- synkro/parsers.py +442 -0
- synkro/pipeline/__init__.py +20 -0
- synkro/pipeline/phases.py +592 -0
- synkro/pipeline/runner.py +769 -0
- synkro/pipelines.py +136 -0
- synkro/prompts/__init__.py +57 -0
- synkro/prompts/base.py +167 -0
- synkro/prompts/golden_templates.py +533 -0
- synkro/prompts/interactive_templates.py +198 -0
- synkro/prompts/multiturn_templates.py +156 -0
- synkro/prompts/templates.py +281 -0
- synkro/prompts/tool_templates.py +318 -0
- synkro/quality/__init__.py +14 -0
- synkro/quality/golden_refiner.py +163 -0
- synkro/quality/grader.py +153 -0
- synkro/quality/multiturn_grader.py +150 -0
- synkro/quality/refiner.py +137 -0
- synkro/quality/tool_grader.py +126 -0
- synkro/quality/tool_refiner.py +128 -0
- synkro/quality/verifier.py +228 -0
- synkro/reporting.py +464 -0
- synkro/schemas.py +521 -0
- synkro/types/__init__.py +43 -0
- synkro/types/core.py +153 -0
- synkro/types/dataset_type.py +33 -0
- synkro/types/logic_map.py +348 -0
- synkro/types/tool.py +94 -0
- synkro-0.4.36.data/data/examples/__init__.py +148 -0
- synkro-0.4.36.dist-info/METADATA +507 -0
- synkro-0.4.36.dist-info/RECORD +81 -0
- synkro-0.4.36.dist-info/WHEEL +4 -0
- synkro-0.4.36.dist-info/entry_points.txt +2 -0
- synkro-0.4.36.dist-info/licenses/LICENSE +21 -0
synkro/types/core.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Core Pydantic models for Synkro."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal, Any
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Role = Literal["system", "user", "assistant", "tool"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Message(BaseModel):
|
|
11
|
+
"""
|
|
12
|
+
A single message in a conversation.
|
|
13
|
+
|
|
14
|
+
Supports both regular chat messages and tool-calling messages.
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
>>> # Regular message
|
|
18
|
+
>>> Message(role="user", content="Hello")
|
|
19
|
+
|
|
20
|
+
>>> # Assistant with tool call (tool_calls is list of dicts or ToolCall objects)
|
|
21
|
+
>>> Message(role="assistant", content=None, tool_calls=[...])
|
|
22
|
+
|
|
23
|
+
>>> # Tool response
|
|
24
|
+
>>> Message(role="tool", content="Result", tool_call_id="call_123")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
role: Role
|
|
28
|
+
content: str | None = None
|
|
29
|
+
tool_calls: list[Any] | None = Field(
|
|
30
|
+
default=None,
|
|
31
|
+
description="Tool calls made by the assistant (list of ToolCall or dicts)"
|
|
32
|
+
)
|
|
33
|
+
tool_call_id: str | None = Field(
|
|
34
|
+
default=None,
|
|
35
|
+
description="ID of the tool call this message responds to (for tool role)"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def model_post_init(self, __context) -> None:
|
|
39
|
+
"""Validate message structure based on role."""
|
|
40
|
+
# For backwards compatibility, ensure content is string for non-tool roles
|
|
41
|
+
if self.role in ("system", "user") and self.content is None:
|
|
42
|
+
self.content = ""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Scenario(BaseModel):
|
|
46
|
+
"""A test scenario for trace generation."""
|
|
47
|
+
|
|
48
|
+
description: str = Field(description="The scenario description")
|
|
49
|
+
context: str = Field(description="Additional context and background")
|
|
50
|
+
category: str | None = Field(default=None, description="Category this scenario belongs to")
|
|
51
|
+
|
|
52
|
+
# Evaluation fields (populated from GoldenScenario)
|
|
53
|
+
scenario_type: str | None = Field(default=None, description="Type: positive, negative, edge_case, irrelevant")
|
|
54
|
+
target_rule_ids: list[str] | None = Field(default=None, description="Rule IDs this scenario tests")
|
|
55
|
+
expected_outcome: str | None = Field(default=None, description="Expected behavior based on rules")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class GradeResult(BaseModel):
|
|
59
|
+
"""Result of grading a trace."""
|
|
60
|
+
|
|
61
|
+
passed: bool = Field(description="Whether the trace passes quality checks")
|
|
62
|
+
issues: list[str] = Field(default_factory=list, description="List of issues found")
|
|
63
|
+
feedback: str = Field(default="", description="Summary feedback for improvement")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class Trace(BaseModel):
|
|
67
|
+
"""A complete training trace with messages and metadata."""
|
|
68
|
+
|
|
69
|
+
messages: list[Message] = Field(description="The conversation messages")
|
|
70
|
+
scenario: Scenario = Field(description="The scenario this trace was generated from")
|
|
71
|
+
grade: GradeResult | None = Field(default=None, description="Grading result if graded")
|
|
72
|
+
|
|
73
|
+
# Golden Trace metadata (for verification)
|
|
74
|
+
reasoning_chain: list[Any] | None = Field(default=None, description="Chain-of-thought reasoning steps with rule citations")
|
|
75
|
+
rules_applied: list[str] | None = Field(default=None, description="Rule IDs that were applied in the response")
|
|
76
|
+
rules_excluded: list[str] | None = Field(default=None, description="Rule IDs that were explicitly excluded")
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def system_message(self) -> str | None:
|
|
80
|
+
"""Get the system message content."""
|
|
81
|
+
for m in self.messages:
|
|
82
|
+
if m.role == "system":
|
|
83
|
+
return m.content
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def user_message(self) -> str:
|
|
88
|
+
"""Get the first user message content."""
|
|
89
|
+
for m in self.messages:
|
|
90
|
+
if m.role == "user":
|
|
91
|
+
return m.content or ""
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def assistant_message(self) -> str:
|
|
96
|
+
"""Get the last assistant message content."""
|
|
97
|
+
for m in reversed(self.messages):
|
|
98
|
+
if m.role == "assistant":
|
|
99
|
+
return m.content or ""
|
|
100
|
+
return ""
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def has_tool_calls(self) -> bool:
|
|
104
|
+
"""Check if this trace contains any tool calls."""
|
|
105
|
+
for m in self.messages:
|
|
106
|
+
if m.tool_calls:
|
|
107
|
+
return True
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class EvalScenario(BaseModel):
|
|
112
|
+
"""
|
|
113
|
+
A scenario for evaluation with ground truth labels.
|
|
114
|
+
|
|
115
|
+
Used by generate_scenarios() for eval dataset generation.
|
|
116
|
+
Contains the test input and expected behavior, but no synthetic response.
|
|
117
|
+
|
|
118
|
+
Examples:
|
|
119
|
+
>>> scenarios = synkro.generate_scenarios(policy, count=100)
|
|
120
|
+
>>> for s in scenarios:
|
|
121
|
+
... response = my_model(s.user_message)
|
|
122
|
+
... grade = synkro.grade(response, s, policy)
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
user_message: str = Field(description="The user's request or question (test input)")
|
|
126
|
+
expected_outcome: str = Field(description="Expected behavior based on policy rules")
|
|
127
|
+
target_rule_ids: list[str] = Field(default_factory=list, description="Rule IDs this scenario tests")
|
|
128
|
+
scenario_type: str = Field(description="Type: positive, negative, edge_case, irrelevant")
|
|
129
|
+
category: str = Field(default="", description="Policy category this scenario belongs to")
|
|
130
|
+
context: str = Field(default="", description="Additional context for the scenario")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class Category(BaseModel):
|
|
134
|
+
"""A category for organizing scenarios."""
|
|
135
|
+
|
|
136
|
+
name: str = Field(description="Category name")
|
|
137
|
+
description: str = Field(description="What this category tests")
|
|
138
|
+
count: int = Field(description="Number of traces to generate for this category")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Plan(BaseModel):
|
|
142
|
+
"""A generation plan with categories and complexity analysis."""
|
|
143
|
+
|
|
144
|
+
categories: list[Category] = Field(description="Categories with trace allocations")
|
|
145
|
+
reasoning: str = Field(description="Explanation of why these categories were chosen")
|
|
146
|
+
recommended_turns: int = Field(
|
|
147
|
+
default=1,
|
|
148
|
+
description="Recommended conversation turns based on policy complexity"
|
|
149
|
+
)
|
|
150
|
+
complexity_level: Literal["simple", "conditional", "complex"] = Field(
|
|
151
|
+
default="simple",
|
|
152
|
+
description="Policy complexity level: simple (1-2 turns), conditional (3 turns), complex (5+ turns)"
|
|
153
|
+
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Dataset type enum for steering generation pipeline."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DatasetType(str, Enum):
|
|
7
|
+
"""
|
|
8
|
+
Type of dataset to generate.
|
|
9
|
+
|
|
10
|
+
The dataset type determines:
|
|
11
|
+
- Prompts used for scenario and response generation
|
|
12
|
+
- Conversation turns (INSTRUCTION/EVALUATION forces 1 turn)
|
|
13
|
+
- Output format and schema
|
|
14
|
+
|
|
15
|
+
Examples:
|
|
16
|
+
>>> from synkro import DatasetType
|
|
17
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.CONVERSATION) # Multi-turn
|
|
18
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.INSTRUCTION) # Single-turn
|
|
19
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.EVALUATION) # Q&A with ground truth
|
|
20
|
+
>>> synkro.generate(policy, dataset_type=DatasetType.TOOL_CALL, tools=[...])
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
CONVERSATION = "conversation"
|
|
24
|
+
"""Multi-turn conversation: {messages: [{role, content}, ...]} with multiple exchanges"""
|
|
25
|
+
|
|
26
|
+
INSTRUCTION = "instruction"
|
|
27
|
+
"""Single-turn instruction-following: {messages: [{role: "user"}, {role: "assistant"}]}"""
|
|
28
|
+
|
|
29
|
+
EVALUATION = "evaluation"
|
|
30
|
+
"""Q&A evaluation dataset: {question, answer, expected_answer, ground_truth_rules, difficulty}"""
|
|
31
|
+
|
|
32
|
+
TOOL_CALL = "tool_call"
|
|
33
|
+
"""Tool Calling: {messages: [..., {tool_calls: [...]}, {role: tool}, ...]}"""
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""Logic Map types for Golden Trace generation.
|
|
2
|
+
|
|
3
|
+
The Logic Map represents a policy as a directed acyclic graph (DAG) of rules,
|
|
4
|
+
enabling grounded reasoning and verification of generated traces.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ScenarioType(str, Enum):
|
|
16
|
+
"""Types of scenarios for balanced dataset generation."""
|
|
17
|
+
|
|
18
|
+
POSITIVE = "positive" # Happy path - user meets all criteria
|
|
19
|
+
NEGATIVE = "negative" # Violation - user fails one criterion
|
|
20
|
+
EDGE_CASE = "edge_case" # Boundary - user at exact limit
|
|
21
|
+
IRRELEVANT = "irrelevant" # Not covered by policy
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RuleCategory(str, Enum):
|
|
25
|
+
"""Categories of rules extracted from policy."""
|
|
26
|
+
|
|
27
|
+
CONSTRAINT = "constraint" # Must/must not conditions
|
|
28
|
+
PERMISSION = "permission" # Allowed/can do
|
|
29
|
+
PROCEDURE = "procedure" # Step-by-step processes
|
|
30
|
+
EXCEPTION = "exception" # Special cases/overrides
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Rule(BaseModel):
|
|
34
|
+
"""
|
|
35
|
+
A single rule extracted from the policy document.
|
|
36
|
+
|
|
37
|
+
Rules form nodes in the Logic Map DAG, with dependencies
|
|
38
|
+
indicating which rules must be evaluated first.
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
>>> rule = Rule(
|
|
42
|
+
... rule_id="R001",
|
|
43
|
+
... text="Refunds are allowed within 30 days of purchase",
|
|
44
|
+
... condition="purchase date is within 30 days",
|
|
45
|
+
... action="allow refund",
|
|
46
|
+
... dependencies=[],
|
|
47
|
+
... category=RuleCategory.PERMISSION,
|
|
48
|
+
... )
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
rule_id: str = Field(
|
|
52
|
+
description="Unique identifier (e.g., 'R001', 'R002')"
|
|
53
|
+
)
|
|
54
|
+
text: str = Field(
|
|
55
|
+
description="Exact rule text from the policy"
|
|
56
|
+
)
|
|
57
|
+
condition: str = Field(
|
|
58
|
+
description="The 'if' part - when this rule applies"
|
|
59
|
+
)
|
|
60
|
+
action: str = Field(
|
|
61
|
+
description="The 'then' part - what happens when rule applies"
|
|
62
|
+
)
|
|
63
|
+
dependencies: list[str] = Field(
|
|
64
|
+
default_factory=list,
|
|
65
|
+
description="Rule IDs that must be evaluated before this rule"
|
|
66
|
+
)
|
|
67
|
+
category: RuleCategory = Field(
|
|
68
|
+
description="Type of rule (constraint, permission, procedure, exception)"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def __hash__(self) -> int:
|
|
72
|
+
return hash(self.rule_id)
|
|
73
|
+
|
|
74
|
+
def __eq__(self, other: object) -> bool:
|
|
75
|
+
if not isinstance(other, Rule):
|
|
76
|
+
return False
|
|
77
|
+
return self.rule_id == other.rule_id
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class LogicMap(BaseModel):
|
|
81
|
+
"""
|
|
82
|
+
Directed Acyclic Graph (DAG) of rules extracted from a policy.
|
|
83
|
+
|
|
84
|
+
The Logic Map is the "Map of Truth" that enables:
|
|
85
|
+
- Grounded scenario generation with rule references
|
|
86
|
+
- Chain-of-thought reasoning with rule citations
|
|
87
|
+
- Verification that traces don't skip or hallucinate rules
|
|
88
|
+
|
|
89
|
+
Examples:
|
|
90
|
+
>>> logic_map = LogicMap(
|
|
91
|
+
... rules=[rule1, rule2, rule3],
|
|
92
|
+
... root_rules=["R001"], # Entry points
|
|
93
|
+
... )
|
|
94
|
+
>>> print(logic_map.get_rule("R001"))
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
rules: list[Rule] = Field(
|
|
98
|
+
description="All rules extracted from the policy"
|
|
99
|
+
)
|
|
100
|
+
root_rules: list[str] = Field(
|
|
101
|
+
default_factory=list,
|
|
102
|
+
description="Rule IDs with no dependencies (entry points)"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def get_rule(self, rule_id: str) -> Rule | None:
|
|
106
|
+
"""Get a rule by its ID."""
|
|
107
|
+
for rule in self.rules:
|
|
108
|
+
if rule.rule_id == rule_id:
|
|
109
|
+
return rule
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
def get_dependents(self, rule_id: str) -> list[Rule]:
|
|
113
|
+
"""Get all rules that depend on the given rule."""
|
|
114
|
+
return [r for r in self.rules if rule_id in r.dependencies]
|
|
115
|
+
|
|
116
|
+
def get_dependencies(self, rule_id: str) -> list[Rule]:
|
|
117
|
+
"""Get all rules that the given rule depends on."""
|
|
118
|
+
rule = self.get_rule(rule_id)
|
|
119
|
+
if not rule:
|
|
120
|
+
return []
|
|
121
|
+
return [r for r in self.rules if r.rule_id in rule.dependencies]
|
|
122
|
+
|
|
123
|
+
def get_chain(self, rule_id: str) -> list[Rule]:
|
|
124
|
+
"""
|
|
125
|
+
Get the full dependency chain for a rule (topologically sorted).
|
|
126
|
+
|
|
127
|
+
Returns all rules that must be evaluated before the given rule,
|
|
128
|
+
in the order they should be evaluated.
|
|
129
|
+
"""
|
|
130
|
+
visited = set()
|
|
131
|
+
chain = []
|
|
132
|
+
|
|
133
|
+
def visit(rid: str):
|
|
134
|
+
if rid in visited:
|
|
135
|
+
return
|
|
136
|
+
visited.add(rid)
|
|
137
|
+
rule = self.get_rule(rid)
|
|
138
|
+
if rule:
|
|
139
|
+
for dep_id in rule.dependencies:
|
|
140
|
+
visit(dep_id)
|
|
141
|
+
chain.append(rule)
|
|
142
|
+
|
|
143
|
+
visit(rule_id)
|
|
144
|
+
return chain
|
|
145
|
+
|
|
146
|
+
def validate_dag(self) -> bool:
|
|
147
|
+
"""Verify the rules form a valid DAG (no cycles)."""
|
|
148
|
+
# Track visit state: 0=unvisited, 1=visiting, 2=visited
|
|
149
|
+
state = {r.rule_id: 0 for r in self.rules}
|
|
150
|
+
|
|
151
|
+
def has_cycle(rule_id: str) -> bool:
|
|
152
|
+
if state.get(rule_id, 0) == 1: # Currently visiting = cycle
|
|
153
|
+
return True
|
|
154
|
+
if state.get(rule_id, 0) == 2: # Already visited = ok
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
state[rule_id] = 1 # Mark as visiting
|
|
158
|
+
rule = self.get_rule(rule_id)
|
|
159
|
+
if rule:
|
|
160
|
+
for dep_id in rule.dependencies:
|
|
161
|
+
if has_cycle(dep_id):
|
|
162
|
+
return True
|
|
163
|
+
state[rule_id] = 2 # Mark as visited
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
for rule in self.rules:
|
|
167
|
+
if has_cycle(rule.rule_id):
|
|
168
|
+
return False
|
|
169
|
+
return True
|
|
170
|
+
|
|
171
|
+
def get_rules_by_category(self, category: RuleCategory) -> list[Rule]:
|
|
172
|
+
"""Get all rules of a specific category."""
|
|
173
|
+
return [r for r in self.rules if r.category == category]
|
|
174
|
+
|
|
175
|
+
def to_display_string(self) -> str:
|
|
176
|
+
"""Generate a human-readable representation of the Logic Map."""
|
|
177
|
+
lines = [f"Logic Map ({len(self.rules)} rules)"]
|
|
178
|
+
lines.append("=" * 40)
|
|
179
|
+
|
|
180
|
+
# Show root rules first
|
|
181
|
+
lines.append("\nRoot Rules (Entry Points):")
|
|
182
|
+
for rid in self.root_rules:
|
|
183
|
+
rule = self.get_rule(rid)
|
|
184
|
+
if rule:
|
|
185
|
+
lines.append(f" {rid}: {rule.text[:60]}...")
|
|
186
|
+
|
|
187
|
+
# Show dependency chains
|
|
188
|
+
lines.append("\nDependency Chains:")
|
|
189
|
+
processed = set()
|
|
190
|
+
for rule in self.rules:
|
|
191
|
+
if rule.rule_id not in processed and rule.dependencies:
|
|
192
|
+
chain = " -> ".join(r.rule_id for r in self.get_chain(rule.rule_id))
|
|
193
|
+
lines.append(f" {chain}")
|
|
194
|
+
processed.update(r.rule_id for r in self.get_chain(rule.rule_id))
|
|
195
|
+
|
|
196
|
+
return "\n".join(lines)
|
|
197
|
+
|
|
198
|
+
def save(self, path: str | Path) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Save the Logic Map to a JSON file.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
path: File path to save to (e.g., "logic_map.json")
|
|
204
|
+
|
|
205
|
+
Examples:
|
|
206
|
+
>>> logic_map.save("logic_map.json")
|
|
207
|
+
>>> # Later, reload it
|
|
208
|
+
>>> logic_map = LogicMap.load("logic_map.json")
|
|
209
|
+
"""
|
|
210
|
+
path = Path(path)
|
|
211
|
+
with open(path, "w") as f:
|
|
212
|
+
json.dump(self.model_dump(), f, indent=2)
|
|
213
|
+
|
|
214
|
+
@classmethod
|
|
215
|
+
def load(cls, path: str | Path) -> "LogicMap":
|
|
216
|
+
"""
|
|
217
|
+
Load a Logic Map from a JSON file.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
path: File path to load from
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
LogicMap instance
|
|
224
|
+
|
|
225
|
+
Examples:
|
|
226
|
+
>>> logic_map = LogicMap.load("logic_map.json")
|
|
227
|
+
>>> print(f"Loaded {len(logic_map.rules)} rules")
|
|
228
|
+
"""
|
|
229
|
+
path = Path(path)
|
|
230
|
+
with open(path) as f:
|
|
231
|
+
data = json.load(f)
|
|
232
|
+
return cls.model_validate(data)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class ReasoningStep(BaseModel):
|
|
236
|
+
"""
|
|
237
|
+
A single step in the Chain-of-Thought reasoning.
|
|
238
|
+
|
|
239
|
+
Each step references exactly one rule and explains how it applies
|
|
240
|
+
(or doesn't apply) to the current scenario.
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
rule_id: str = Field(
|
|
244
|
+
description="The rule being evaluated in this step"
|
|
245
|
+
)
|
|
246
|
+
rule_text: str = Field(
|
|
247
|
+
description="The text of the rule"
|
|
248
|
+
)
|
|
249
|
+
applies: bool = Field(
|
|
250
|
+
description="Whether this rule applies to the scenario"
|
|
251
|
+
)
|
|
252
|
+
reasoning: str = Field(
|
|
253
|
+
description="Explanation of why the rule does/doesn't apply"
|
|
254
|
+
)
|
|
255
|
+
exclusions: list[str] = Field(
|
|
256
|
+
default_factory=list,
|
|
257
|
+
description="Rule IDs that are excluded because this rule applies"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class GoldenScenario(BaseModel):
|
|
262
|
+
"""
|
|
263
|
+
A scenario with explicit type and rule targeting.
|
|
264
|
+
|
|
265
|
+
Extends the base Scenario concept with:
|
|
266
|
+
- Explicit scenario type (positive, negative, edge_case, irrelevant)
|
|
267
|
+
- Target rule IDs that this scenario is designed to test
|
|
268
|
+
- Expected outcome based on the rules
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
description: str = Field(
|
|
272
|
+
description="The user's request or question"
|
|
273
|
+
)
|
|
274
|
+
context: str = Field(
|
|
275
|
+
default="",
|
|
276
|
+
description="Additional context for the scenario"
|
|
277
|
+
)
|
|
278
|
+
category: str = Field(
|
|
279
|
+
default="",
|
|
280
|
+
description="The policy category this scenario belongs to"
|
|
281
|
+
)
|
|
282
|
+
scenario_type: ScenarioType = Field(
|
|
283
|
+
description="Type of scenario (positive, negative, edge_case, irrelevant)"
|
|
284
|
+
)
|
|
285
|
+
target_rule_ids: list[str] = Field(
|
|
286
|
+
default_factory=list,
|
|
287
|
+
description="Rule IDs this scenario is designed to test"
|
|
288
|
+
)
|
|
289
|
+
expected_outcome: str = Field(
|
|
290
|
+
default="",
|
|
291
|
+
description="Expected response behavior based on rules"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def to_base_scenario(self) -> "Scenario":
|
|
295
|
+
"""Convert to base Scenario type for compatibility, preserving eval fields."""
|
|
296
|
+
from synkro.types.core import Scenario
|
|
297
|
+
return Scenario(
|
|
298
|
+
description=self.description,
|
|
299
|
+
context=self.context,
|
|
300
|
+
category=self.category,
|
|
301
|
+
scenario_type=self.scenario_type.value if self.scenario_type else None,
|
|
302
|
+
target_rule_ids=self.target_rule_ids,
|
|
303
|
+
expected_outcome=self.expected_outcome,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class VerificationResult(BaseModel):
|
|
308
|
+
"""
|
|
309
|
+
Result of verifying a trace against the Logic Map.
|
|
310
|
+
|
|
311
|
+
The Auditor produces this to indicate whether a trace
|
|
312
|
+
correctly applies all relevant rules without hallucination.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
passed: bool = Field(
|
|
316
|
+
description="Whether the trace passed verification"
|
|
317
|
+
)
|
|
318
|
+
issues: list[str] = Field(
|
|
319
|
+
default_factory=list,
|
|
320
|
+
description="List of issues found (if any)"
|
|
321
|
+
)
|
|
322
|
+
skipped_rules: list[str] = Field(
|
|
323
|
+
default_factory=list,
|
|
324
|
+
description="Rule IDs that should have been applied but weren't"
|
|
325
|
+
)
|
|
326
|
+
hallucinated_rules: list[str] = Field(
|
|
327
|
+
default_factory=list,
|
|
328
|
+
description="Rule IDs cited that don't exist or don't apply"
|
|
329
|
+
)
|
|
330
|
+
contradictions: list[str] = Field(
|
|
331
|
+
default_factory=list,
|
|
332
|
+
description="Logical contradictions found in the trace"
|
|
333
|
+
)
|
|
334
|
+
rules_verified: list[str] = Field(
|
|
335
|
+
default_factory=list,
|
|
336
|
+
description="Rule IDs that were correctly applied"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
__all__ = [
|
|
341
|
+
"ScenarioType",
|
|
342
|
+
"RuleCategory",
|
|
343
|
+
"Rule",
|
|
344
|
+
"LogicMap",
|
|
345
|
+
"ReasoningStep",
|
|
346
|
+
"GoldenScenario",
|
|
347
|
+
"VerificationResult",
|
|
348
|
+
]
|
synkro/types/tool.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Tool-related types for tool call trace generation."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ToolFunction(BaseModel):
|
|
7
|
+
"""Function details within a tool call."""
|
|
8
|
+
|
|
9
|
+
name: str = Field(description="Name of the function to call")
|
|
10
|
+
arguments: str = Field(description="JSON string of function arguments")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ToolCall(BaseModel):
|
|
14
|
+
"""A tool call made by the assistant."""
|
|
15
|
+
|
|
16
|
+
id: str = Field(description="Unique identifier for this tool call")
|
|
17
|
+
type: str = Field(default="function", description="Type of tool call")
|
|
18
|
+
function: ToolFunction = Field(description="Function details")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ToolResult(BaseModel):
|
|
22
|
+
"""Result from a tool execution."""
|
|
23
|
+
|
|
24
|
+
tool_call_id: str = Field(description="ID of the tool call this responds to")
|
|
25
|
+
content: str = Field(description="The tool's response content")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ToolDefinition(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
Definition of a tool that an agent can use.
|
|
31
|
+
|
|
32
|
+
Examples:
|
|
33
|
+
>>> web_search = ToolDefinition(
|
|
34
|
+
... name="web_search",
|
|
35
|
+
... description="Search the web for current information",
|
|
36
|
+
... parameters={
|
|
37
|
+
... "type": "object",
|
|
38
|
+
... "properties": {
|
|
39
|
+
... "query": {"type": "string", "description": "Search query"}
|
|
40
|
+
... },
|
|
41
|
+
... "required": ["query"]
|
|
42
|
+
... },
|
|
43
|
+
... examples=[{"query": "weather in NYC"}],
|
|
44
|
+
... mock_responses=["NYC: 72°F, sunny"]
|
|
45
|
+
... )
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
name: str = Field(description="Name of the tool")
|
|
49
|
+
description: str = Field(description="What the tool does")
|
|
50
|
+
parameters: dict = Field(
|
|
51
|
+
description="JSON Schema for the tool's parameters",
|
|
52
|
+
default_factory=lambda: {"type": "object", "properties": {}}
|
|
53
|
+
)
|
|
54
|
+
examples: list[dict] = Field(
|
|
55
|
+
default_factory=list,
|
|
56
|
+
description="Example tool calls for few-shot learning"
|
|
57
|
+
)
|
|
58
|
+
mock_responses: list[str] = Field(
|
|
59
|
+
default_factory=list,
|
|
60
|
+
description="Example responses for simulation"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def to_openai_format(self) -> dict:
|
|
64
|
+
"""Convert to OpenAI function calling format."""
|
|
65
|
+
return {
|
|
66
|
+
"type": "function",
|
|
67
|
+
"function": {
|
|
68
|
+
"name": self.name,
|
|
69
|
+
"description": self.description,
|
|
70
|
+
"parameters": self.parameters,
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def to_system_prompt(self) -> str:
|
|
75
|
+
"""Generate a system prompt description of this tool."""
|
|
76
|
+
params_desc = []
|
|
77
|
+
props = self.parameters.get("properties", {})
|
|
78
|
+
required = self.parameters.get("required", [])
|
|
79
|
+
|
|
80
|
+
for param_name, param_info in props.items():
|
|
81
|
+
param_type = param_info.get("type", "any")
|
|
82
|
+
param_desc = param_info.get("description", "")
|
|
83
|
+
req_marker = " (required)" if param_name in required else ""
|
|
84
|
+
params_desc.append(f" - {param_name}: {param_type}{req_marker} - {param_desc}")
|
|
85
|
+
|
|
86
|
+
params_str = "\n".join(params_desc) if params_desc else " (no parameters)"
|
|
87
|
+
|
|
88
|
+
return f"""**{self.name}**: {self.description}
|
|
89
|
+
Parameters:
|
|
90
|
+
{params_str}"""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
__all__ = ["ToolDefinition", "ToolCall", "ToolFunction", "ToolResult"]
|
|
94
|
+
|