tactus 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/__init__.py +18 -1
- tactus/adapters/broker_log.py +127 -34
- tactus/adapters/channels/__init__.py +153 -0
- tactus/adapters/channels/base.py +174 -0
- tactus/adapters/channels/broker.py +179 -0
- tactus/adapters/channels/cli.py +448 -0
- tactus/adapters/channels/host.py +225 -0
- tactus/adapters/channels/ipc.py +297 -0
- tactus/adapters/channels/sse.py +305 -0
- tactus/adapters/cli_hitl.py +223 -1
- tactus/adapters/control_loop.py +879 -0
- tactus/adapters/file_storage.py +35 -2
- tactus/adapters/ide_log.py +7 -1
- tactus/backends/http_backend.py +0 -1
- tactus/broker/client.py +31 -1
- tactus/broker/server.py +416 -92
- tactus/cli/app.py +270 -7
- tactus/cli/control.py +393 -0
- tactus/core/config_manager.py +33 -6
- tactus/core/dsl_stubs.py +102 -18
- tactus/core/execution_context.py +265 -8
- tactus/core/lua_sandbox.py +8 -9
- tactus/core/registry.py +19 -2
- tactus/core/runtime.py +235 -27
- tactus/docker/Dockerfile.pypi +49 -0
- tactus/docs/__init__.py +33 -0
- tactus/docs/extractor.py +326 -0
- tactus/docs/html_renderer.py +72 -0
- tactus/docs/models.py +121 -0
- tactus/docs/templates/base.html +204 -0
- tactus/docs/templates/index.html +58 -0
- tactus/docs/templates/module.html +96 -0
- tactus/dspy/agent.py +382 -22
- tactus/dspy/broker_lm.py +57 -6
- tactus/dspy/config.py +14 -3
- tactus/dspy/history.py +2 -1
- tactus/dspy/module.py +136 -11
- tactus/dspy/signature.py +0 -1
- tactus/ide/server.py +300 -9
- tactus/primitives/human.py +619 -47
- tactus/primitives/system.py +0 -1
- tactus/protocols/__init__.py +25 -0
- tactus/protocols/control.py +427 -0
- tactus/protocols/notification.py +207 -0
- tactus/sandbox/container_runner.py +79 -11
- tactus/sandbox/docker_manager.py +23 -0
- tactus/sandbox/entrypoint.py +26 -0
- tactus/sandbox/protocol.py +3 -0
- tactus/stdlib/README.md +77 -0
- tactus/stdlib/__init__.py +27 -1
- tactus/stdlib/classify/__init__.py +165 -0
- tactus/stdlib/classify/classify.spec.tac +195 -0
- tactus/stdlib/classify/classify.tac +257 -0
- tactus/stdlib/classify/fuzzy.py +282 -0
- tactus/stdlib/classify/llm.py +319 -0
- tactus/stdlib/classify/primitive.py +287 -0
- tactus/stdlib/core/__init__.py +57 -0
- tactus/stdlib/core/base.py +320 -0
- tactus/stdlib/core/confidence.py +211 -0
- tactus/stdlib/core/models.py +161 -0
- tactus/stdlib/core/retry.py +171 -0
- tactus/stdlib/core/validation.py +274 -0
- tactus/stdlib/extract/__init__.py +125 -0
- tactus/stdlib/extract/llm.py +330 -0
- tactus/stdlib/extract/primitive.py +256 -0
- tactus/stdlib/tac/tactus/classify/base.tac +51 -0
- tactus/stdlib/tac/tactus/classify/fuzzy.tac +87 -0
- tactus/stdlib/tac/tactus/classify/index.md +77 -0
- tactus/stdlib/tac/tactus/classify/init.tac +29 -0
- tactus/stdlib/tac/tactus/classify/llm.tac +150 -0
- tactus/stdlib/tac/tactus/classify.spec.tac +191 -0
- tactus/stdlib/tac/tactus/extract/base.tac +138 -0
- tactus/stdlib/tac/tactus/extract/index.md +96 -0
- tactus/stdlib/tac/tactus/extract/init.tac +27 -0
- tactus/stdlib/tac/tactus/extract/llm.tac +201 -0
- tactus/stdlib/tac/tactus/extract.spec.tac +153 -0
- tactus/stdlib/tac/tactus/generate/base.tac +142 -0
- tactus/stdlib/tac/tactus/generate/index.md +195 -0
- tactus/stdlib/tac/tactus/generate/init.tac +28 -0
- tactus/stdlib/tac/tactus/generate/llm.tac +169 -0
- tactus/stdlib/tac/tactus/generate.spec.tac +210 -0
- tactus/testing/behave_integration.py +171 -7
- tactus/testing/context.py +0 -1
- tactus/testing/evaluation_runner.py +0 -1
- tactus/testing/gherkin_parser.py +0 -1
- tactus/testing/mock_hitl.py +0 -1
- tactus/testing/mock_tools.py +0 -1
- tactus/testing/models.py +0 -1
- tactus/testing/steps/builtin.py +0 -1
- tactus/testing/steps/custom.py +81 -22
- tactus/testing/steps/registry.py +0 -1
- tactus/testing/test_runner.py +7 -1
- tactus/validation/semantic_visitor.py +11 -5
- tactus/validation/validator.py +0 -1
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/METADATA +14 -2
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/RECORD +100 -49
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/WHEEL +0 -0
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/entry_points.txt +0 -0
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FuzzyMatchClassifier - Classification using string similarity.
|
|
3
|
+
|
|
4
|
+
This classifier uses fuzzy string matching to classify text based on
|
|
5
|
+
similarity to expected values. Useful for verification tasks where
|
|
6
|
+
you want to check if a response matches an expected value.
|
|
7
|
+
|
|
8
|
+
Supports multiple algorithms from rapidfuzz library:
|
|
9
|
+
- ratio: Basic character-level similarity (default)
|
|
10
|
+
- token_set_ratio: Tokenize and compare unique words (handles reordering)
|
|
11
|
+
- token_sort_ratio: Sort tokens before comparing
|
|
12
|
+
- partial_ratio: Best substring match
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from rapidfuzz import fuzz
|
|
20
|
+
|
|
21
|
+
HAS_RAPIDFUZZ = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
from difflib import SequenceMatcher
|
|
24
|
+
|
|
25
|
+
HAS_RAPIDFUZZ = False
|
|
26
|
+
|
|
27
|
+
from ..core.base import BaseClassifier
|
|
28
|
+
from ..core.models import ClassifierResult
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def calculate_similarity(s1: str, s2: str, algorithm: str = "ratio") -> float:
|
|
34
|
+
"""
|
|
35
|
+
Calculate similarity between two strings using specified algorithm.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
s1: First string
|
|
39
|
+
s2: Second string
|
|
40
|
+
algorithm: One of "ratio", "token_set_ratio", "token_sort_ratio", "partial_ratio"
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Float between 0.0 (no similarity) and 1.0 (identical)
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If algorithm is not supported
|
|
47
|
+
|
|
48
|
+
Note:
|
|
49
|
+
Uses rapidfuzz if available (faster), falls back to difflib for basic ratio.
|
|
50
|
+
"""
|
|
51
|
+
if not s1 or not s2:
|
|
52
|
+
return 0.0
|
|
53
|
+
|
|
54
|
+
# Normalize: lowercase and strip whitespace
|
|
55
|
+
s1_norm = s1.lower().strip()
|
|
56
|
+
s2_norm = s2.lower().strip()
|
|
57
|
+
|
|
58
|
+
if HAS_RAPIDFUZZ:
|
|
59
|
+
# Use rapidfuzz (C++ backend, faster)
|
|
60
|
+
if algorithm == "token_set_ratio":
|
|
61
|
+
score = fuzz.token_set_ratio(s1_norm, s2_norm)
|
|
62
|
+
elif algorithm == "token_sort_ratio":
|
|
63
|
+
score = fuzz.token_sort_ratio(s1_norm, s2_norm)
|
|
64
|
+
elif algorithm == "partial_ratio":
|
|
65
|
+
score = fuzz.partial_ratio(s1_norm, s2_norm)
|
|
66
|
+
elif algorithm == "ratio":
|
|
67
|
+
score = fuzz.ratio(s1_norm, s2_norm)
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Unsupported algorithm: {algorithm}. "
|
|
71
|
+
"Choose from: ratio, token_set_ratio, token_sort_ratio, partial_ratio"
|
|
72
|
+
)
|
|
73
|
+
# Normalize from 0-100 to 0.0-1.0
|
|
74
|
+
return score / 100.0
|
|
75
|
+
else:
|
|
76
|
+
# Fallback to difflib (only supports ratio)
|
|
77
|
+
if algorithm != "ratio":
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Algorithm '{algorithm}' requires rapidfuzz library. "
|
|
80
|
+
"Install with: pip install rapidfuzz"
|
|
81
|
+
)
|
|
82
|
+
return SequenceMatcher(None, s1_norm, s2_norm).ratio()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class FuzzyMatchClassifier(BaseClassifier):
|
|
86
|
+
"""
|
|
87
|
+
String similarity based classifier.
|
|
88
|
+
|
|
89
|
+
Compares input text against expected value(s) and returns whether
|
|
90
|
+
they match within a configurable threshold.
|
|
91
|
+
|
|
92
|
+
Two modes of operation:
|
|
93
|
+
|
|
94
|
+
1. Binary mode (single expected value):
|
|
95
|
+
Returns "Yes" if similarity >= threshold, "No" otherwise.
|
|
96
|
+
|
|
97
|
+
classifier = FuzzyMatchClassifier(
|
|
98
|
+
expected="Customer Service",
|
|
99
|
+
threshold=0.8,
|
|
100
|
+
)
|
|
101
|
+
result = classifier.classify("customer service dept")
|
|
102
|
+
# result.value = "Yes", result.confidence = 0.92
|
|
103
|
+
|
|
104
|
+
2. Multi-class mode (multiple expected values):
|
|
105
|
+
Returns the closest matching class if similarity >= threshold,
|
|
106
|
+
or "NO_MATCH" if nothing matches.
|
|
107
|
+
|
|
108
|
+
classifier = FuzzyMatchClassifier(
|
|
109
|
+
classes=["Technical Support", "Billing", "Sales"],
|
|
110
|
+
threshold=0.7,
|
|
111
|
+
)
|
|
112
|
+
result = classifier.classify("tech support")
|
|
113
|
+
# result.value = "Technical Support", result.confidence = 0.85
|
|
114
|
+
|
|
115
|
+
Example usage in Lua:
|
|
116
|
+
-- Binary: Does this match "Customer Service"?
|
|
117
|
+
result = Classify {
|
|
118
|
+
method = "fuzzy",
|
|
119
|
+
expected = "Customer Service",
|
|
120
|
+
threshold = 0.8,
|
|
121
|
+
input = agent_response
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
-- Multi-class: Which department?
|
|
125
|
+
result = Classify {
|
|
126
|
+
method = "fuzzy",
|
|
127
|
+
classes = {"Technical Support", "Billing", "Sales"},
|
|
128
|
+
threshold = 0.7,
|
|
129
|
+
input = department_name
|
|
130
|
+
}
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
classes: Optional[List[str]] = None,
|
|
136
|
+
expected: Optional[str] = None,
|
|
137
|
+
threshold: float = 0.8,
|
|
138
|
+
algorithm: str = "ratio",
|
|
139
|
+
target_classes: Optional[List[str]] = None,
|
|
140
|
+
name: Optional[str] = None,
|
|
141
|
+
# Accept but ignore these (for factory compatibility)
|
|
142
|
+
config: Optional[Dict[str, Any]] = None,
|
|
143
|
+
**kwargs,
|
|
144
|
+
):
|
|
145
|
+
"""
|
|
146
|
+
Initialize FuzzyMatchClassifier.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
classes: List of possible values to match against (multi-class mode)
|
|
150
|
+
expected: Single expected value (binary mode, returns Yes/No)
|
|
151
|
+
threshold: Minimum similarity score to consider a match (0.0 to 1.0)
|
|
152
|
+
algorithm: Similarity algorithm - "ratio" (default), "token_set_ratio",
|
|
153
|
+
"token_sort_ratio", or "partial_ratio"
|
|
154
|
+
target_classes: Classes considered "positive" for precision/recall
|
|
155
|
+
name: Optional name for this classifier
|
|
156
|
+
|
|
157
|
+
Algorithm details:
|
|
158
|
+
- ratio: Character-level similarity (best for exact matches)
|
|
159
|
+
- token_set_ratio: Tokenizes and compares unique words (handles reordering)
|
|
160
|
+
- token_sort_ratio: Sorts tokens before comparing (handles reordering)
|
|
161
|
+
- partial_ratio: Best substring match (good for abbreviations)
|
|
162
|
+
"""
|
|
163
|
+
# If config dict is provided, extract parameters from it
|
|
164
|
+
if config is not None:
|
|
165
|
+
classes = config.get("classes", classes)
|
|
166
|
+
expected = config.get("expected", expected)
|
|
167
|
+
threshold = config.get("threshold", threshold)
|
|
168
|
+
algorithm = config.get("algorithm", algorithm)
|
|
169
|
+
target_classes = config.get("target_classes", target_classes)
|
|
170
|
+
name = config.get("name", name)
|
|
171
|
+
|
|
172
|
+
self.threshold = threshold
|
|
173
|
+
self.algorithm = algorithm
|
|
174
|
+
self.name = name
|
|
175
|
+
|
|
176
|
+
# Determine mode: binary or multi-class
|
|
177
|
+
if expected is not None:
|
|
178
|
+
# Binary mode: Yes/No based on match to expected
|
|
179
|
+
self.mode = "binary"
|
|
180
|
+
self.expected = expected
|
|
181
|
+
self.classes = ["Yes", "No"]
|
|
182
|
+
self.target_classes = target_classes or ["Yes"]
|
|
183
|
+
elif classes is not None and len(classes) > 0:
|
|
184
|
+
# Multi-class mode: return closest matching class
|
|
185
|
+
self.mode = "multiclass"
|
|
186
|
+
self.expected = None
|
|
187
|
+
self.classes = list(classes)
|
|
188
|
+
self.target_classes = target_classes or []
|
|
189
|
+
else:
|
|
190
|
+
raise ValueError(
|
|
191
|
+
"FuzzyMatchClassifier requires either 'expected' (binary mode) "
|
|
192
|
+
"or 'classes' (multi-class mode)"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Track statistics
|
|
196
|
+
self.total_calls = 0
|
|
197
|
+
|
|
198
|
+
def classify(self, input_text: str) -> ClassifierResult:
|
|
199
|
+
"""
|
|
200
|
+
Classify the input text using fuzzy string matching.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
input_text: The text to classify
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
ClassifierResult with value, confidence (similarity score), explanation
|
|
207
|
+
"""
|
|
208
|
+
self.total_calls += 1
|
|
209
|
+
|
|
210
|
+
if self.mode == "binary":
|
|
211
|
+
return self._classify_binary(input_text)
|
|
212
|
+
else:
|
|
213
|
+
return self._classify_multiclass(input_text)
|
|
214
|
+
|
|
215
|
+
def _classify_binary(self, input_text: str) -> ClassifierResult:
|
|
216
|
+
"""
|
|
217
|
+
Binary classification: Does input match expected value?
|
|
218
|
+
|
|
219
|
+
Returns "Yes" or "No" with similarity as confidence.
|
|
220
|
+
Also returns matched_text (the expected value) for consistency.
|
|
221
|
+
"""
|
|
222
|
+
similarity = calculate_similarity(input_text, self.expected, self.algorithm)
|
|
223
|
+
|
|
224
|
+
if similarity >= self.threshold:
|
|
225
|
+
return ClassifierResult(
|
|
226
|
+
value="Yes",
|
|
227
|
+
confidence=similarity,
|
|
228
|
+
matched_text=self.expected, # Return what it matched against
|
|
229
|
+
explanation=f"Input matches expected value with {similarity:.1%} similarity using {self.algorithm} (threshold: {self.threshold:.1%})",
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
return ClassifierResult(
|
|
233
|
+
value="No",
|
|
234
|
+
confidence=1.0 - similarity, # Confidence in "No"
|
|
235
|
+
matched_text=None, # No match found
|
|
236
|
+
explanation=f"Input does not match expected value. Similarity: {similarity:.1%} using {self.algorithm} (threshold: {self.threshold:.1%})",
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def _classify_multiclass(self, input_text: str) -> ClassifierResult:
|
|
240
|
+
"""
|
|
241
|
+
Multi-class classification: Find best matching class.
|
|
242
|
+
|
|
243
|
+
Returns the closest matching class or "NO_MATCH" if none meet threshold.
|
|
244
|
+
matched_text contains the actual matched string from the classes list.
|
|
245
|
+
"""
|
|
246
|
+
best_match = None
|
|
247
|
+
best_similarity = 0.0
|
|
248
|
+
|
|
249
|
+
for cls in self.classes:
|
|
250
|
+
similarity = calculate_similarity(input_text, cls, self.algorithm)
|
|
251
|
+
if similarity > best_similarity:
|
|
252
|
+
best_similarity = similarity
|
|
253
|
+
best_match = cls
|
|
254
|
+
|
|
255
|
+
if best_similarity >= self.threshold:
|
|
256
|
+
return ClassifierResult(
|
|
257
|
+
value=best_match,
|
|
258
|
+
confidence=best_similarity,
|
|
259
|
+
matched_text=best_match, # The actual matched class name
|
|
260
|
+
explanation=f"Best match: '{best_match}' with {best_similarity:.1%} similarity using {self.algorithm}",
|
|
261
|
+
)
|
|
262
|
+
else:
|
|
263
|
+
return ClassifierResult(
|
|
264
|
+
value="NO_MATCH",
|
|
265
|
+
confidence=1.0 - best_similarity, # Confidence in no match
|
|
266
|
+
matched_text=None, # No match found
|
|
267
|
+
explanation=f"No class matched above threshold using {self.algorithm}. Best was '{best_match}' at {best_similarity:.1%} (threshold: {self.threshold:.1%})",
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def reset(self) -> None:
|
|
271
|
+
"""Reset classifier state (no-op for fuzzy matcher)."""
|
|
272
|
+
pass
|
|
273
|
+
|
|
274
|
+
def __repr__(self) -> str:
|
|
275
|
+
if self.mode == "binary":
|
|
276
|
+
return f"FuzzyMatchClassifier(expected='{self.expected}', threshold={self.threshold}, algorithm='{self.algorithm}')"
|
|
277
|
+
else:
|
|
278
|
+
return f"FuzzyMatchClassifier(classes={self.classes}, threshold={self.threshold}, algorithm='{self.algorithm}')"
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# Also provide as FuzzyClassifier for shorter name
|
|
282
|
+
FuzzyClassifier = FuzzyMatchClassifier
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLMClassifier - Classification using Language Models with retry logic.
|
|
3
|
+
|
|
4
|
+
This classifier uses an LLM (via agent_factory) to classify text, with built-in
|
|
5
|
+
retry logic that provides conversational feedback when the model returns invalid
|
|
6
|
+
classifications.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from ..core.base import BaseClassifier
|
|
14
|
+
from ..core.models import ClassifierResult
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LLMClassifier(BaseClassifier):
|
|
20
|
+
"""
|
|
21
|
+
LLM-based classifier with automatic retry and validation.
|
|
22
|
+
|
|
23
|
+
Uses conversational feedback to help the LLM self-correct when it
|
|
24
|
+
returns invalid classifications.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
classifier = LLMClassifier(
|
|
28
|
+
classes=["Yes", "No"],
|
|
29
|
+
prompt="Did the agent greet the customer?",
|
|
30
|
+
agent_factory=my_agent_factory,
|
|
31
|
+
)
|
|
32
|
+
result = classifier.classify("Hello, how can I help you today?")
|
|
33
|
+
# result.value = "Yes"
|
|
34
|
+
# result.confidence = 0.95
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
classes: List[str],
|
|
40
|
+
prompt: str,
|
|
41
|
+
agent_factory: Callable[[Dict[str, Any]], Any],
|
|
42
|
+
target_classes: Optional[List[str]] = None,
|
|
43
|
+
max_retries: int = 3,
|
|
44
|
+
temperature: float = 0.3,
|
|
45
|
+
model: Optional[str] = None,
|
|
46
|
+
confidence_mode: str = "heuristic",
|
|
47
|
+
name: Optional[str] = None,
|
|
48
|
+
):
|
|
49
|
+
"""
|
|
50
|
+
Initialize LLMClassifier.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
classes: List of valid classification values
|
|
54
|
+
prompt: Classification instruction/prompt
|
|
55
|
+
agent_factory: Factory function to create Agent instances
|
|
56
|
+
target_classes: Classes considered "positive" for precision/recall
|
|
57
|
+
max_retries: Maximum retry attempts on invalid output
|
|
58
|
+
temperature: LLM temperature for classification
|
|
59
|
+
model: Specific model to use (optional)
|
|
60
|
+
confidence_mode: "heuristic" or "none"
|
|
61
|
+
name: Optional name for this classifier
|
|
62
|
+
"""
|
|
63
|
+
self.classes = classes
|
|
64
|
+
self.target_classes = target_classes or []
|
|
65
|
+
self.name = name
|
|
66
|
+
self.prompt = prompt
|
|
67
|
+
self.agent_factory = agent_factory
|
|
68
|
+
self.max_retries = max_retries
|
|
69
|
+
self.temperature = temperature
|
|
70
|
+
self.model = model
|
|
71
|
+
self.confidence_mode = confidence_mode
|
|
72
|
+
|
|
73
|
+
# Build classification system prompt
|
|
74
|
+
self._system_prompt = self._build_system_prompt()
|
|
75
|
+
|
|
76
|
+
# Create agent for classification
|
|
77
|
+
self._agent = self._create_agent()
|
|
78
|
+
|
|
79
|
+
# Track statistics
|
|
80
|
+
self.total_calls = 0
|
|
81
|
+
self.total_retries = 0
|
|
82
|
+
|
|
83
|
+
def _build_system_prompt(self) -> str:
|
|
84
|
+
"""Build the classification system prompt."""
|
|
85
|
+
classes_str = ", ".join(f'"{c}"' for c in self.classes)
|
|
86
|
+
|
|
87
|
+
return f"""You are a classification assistant. Your task is to classify input according to the following instruction:
|
|
88
|
+
|
|
89
|
+
{self.prompt}
|
|
90
|
+
|
|
91
|
+
VALID CLASSIFICATIONS: {classes_str}
|
|
92
|
+
|
|
93
|
+
IMPORTANT RULES:
|
|
94
|
+
1. You MUST respond with EXACTLY one of the valid classifications listed above.
|
|
95
|
+
2. Start your response with the classification on its own line.
|
|
96
|
+
3. Then provide a brief explanation on the following lines.
|
|
97
|
+
|
|
98
|
+
RESPONSE FORMAT:
|
|
99
|
+
<classification>
|
|
100
|
+
<explanation>
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
Yes
|
|
104
|
+
The text clearly indicates agreement because...
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def _create_agent(self) -> Any:
|
|
108
|
+
"""Create the internal Agent for classification."""
|
|
109
|
+
if self.agent_factory is None:
|
|
110
|
+
raise RuntimeError("LLMClassifier requires agent_factory")
|
|
111
|
+
|
|
112
|
+
agent_config = {
|
|
113
|
+
"system_prompt": self._system_prompt,
|
|
114
|
+
"temperature": self.temperature,
|
|
115
|
+
}
|
|
116
|
+
# Optional stable name for mocking/traceability. When set, the DSL wrapper
|
|
117
|
+
# renames the internal _temp_agent_* handle so it can be mocked via:
|
|
118
|
+
# Mocks { <name> = { message = "...", tool_calls = {...} } }
|
|
119
|
+
if self.name:
|
|
120
|
+
agent_config["name"] = self.name
|
|
121
|
+
if self.model:
|
|
122
|
+
agent_config["model"] = self.model
|
|
123
|
+
|
|
124
|
+
return self.agent_factory(agent_config)
|
|
125
|
+
|
|
126
|
+
def classify(self, input_text: str) -> ClassifierResult:
|
|
127
|
+
"""
|
|
128
|
+
Classify the input text with retry logic.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
input_text: The text to classify
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
ClassifierResult with value, confidence, explanation
|
|
135
|
+
"""
|
|
136
|
+
self.total_calls += 1
|
|
137
|
+
|
|
138
|
+
# Reset agent conversation for fresh classification
|
|
139
|
+
if hasattr(self._agent, "reset"):
|
|
140
|
+
self._agent.reset()
|
|
141
|
+
|
|
142
|
+
retry_count = 0
|
|
143
|
+
last_response = None
|
|
144
|
+
|
|
145
|
+
for attempt in range(self.max_retries + 1):
|
|
146
|
+
# Build the message for this attempt
|
|
147
|
+
if attempt == 0:
|
|
148
|
+
message = f"Please classify the following:\n\n{input_text}"
|
|
149
|
+
else:
|
|
150
|
+
# Retry with feedback
|
|
151
|
+
retry_count += 1
|
|
152
|
+
self.total_retries += 1
|
|
153
|
+
message = self._build_retry_feedback(last_response)
|
|
154
|
+
logger.debug(f"Classification retry {retry_count}: {message[:100]}...")
|
|
155
|
+
|
|
156
|
+
# Call the agent
|
|
157
|
+
try:
|
|
158
|
+
result = self._call_agent(message)
|
|
159
|
+
last_response = result.get("response") or result.get("message") or str(result)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Agent call failed: {e}")
|
|
162
|
+
return ClassifierResult(
|
|
163
|
+
value="ERROR",
|
|
164
|
+
error=str(e),
|
|
165
|
+
retry_count=retry_count,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Parse the response
|
|
169
|
+
parsed = self._parse_response(last_response)
|
|
170
|
+
|
|
171
|
+
# Check if classification is valid
|
|
172
|
+
if parsed["value"] in self.classes:
|
|
173
|
+
confidence = self._extract_confidence(last_response, parsed["value"])
|
|
174
|
+
return ClassifierResult(
|
|
175
|
+
value=parsed["value"],
|
|
176
|
+
confidence=confidence,
|
|
177
|
+
explanation=parsed["explanation"],
|
|
178
|
+
retry_count=retry_count,
|
|
179
|
+
raw_response=last_response,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
logger.debug(f"Invalid classification '{parsed['value']}', retrying...")
|
|
183
|
+
|
|
184
|
+
# All retries exhausted
|
|
185
|
+
logger.warning(f"Classification failed after {self.max_retries} retries")
|
|
186
|
+
return ClassifierResult(
|
|
187
|
+
value="ERROR",
|
|
188
|
+
error=f"Max retries ({self.max_retries}) exceeded. Last response: {last_response[:200] if last_response else 'None'}",
|
|
189
|
+
retry_count=retry_count,
|
|
190
|
+
raw_response=last_response,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def _call_agent(self, message: str) -> Dict[str, Any]:
|
|
194
|
+
"""Call the internal agent with a message."""
|
|
195
|
+
input_dict = {"message": message}
|
|
196
|
+
result = self._agent(input_dict)
|
|
197
|
+
|
|
198
|
+
# Convert result to dict
|
|
199
|
+
if hasattr(result, "to_dict"):
|
|
200
|
+
return result.to_dict()
|
|
201
|
+
if hasattr(result, "message"):
|
|
202
|
+
return {"response": result.message}
|
|
203
|
+
if hasattr(result, "response"):
|
|
204
|
+
return {"response": result.response}
|
|
205
|
+
if isinstance(result, dict):
|
|
206
|
+
return result
|
|
207
|
+
|
|
208
|
+
return {"response": str(result)}
|
|
209
|
+
|
|
210
|
+
def _build_retry_feedback(self, last_response: str) -> str:
|
|
211
|
+
"""Build feedback message for retry."""
|
|
212
|
+
classes_str = ", ".join(f'"{c}"' for c in self.classes)
|
|
213
|
+
return f"""Your previous response was not a valid classification.
|
|
214
|
+
|
|
215
|
+
Previous response: "{last_response[:200]}..."
|
|
216
|
+
|
|
217
|
+
VALID CLASSIFICATIONS ARE: {classes_str}
|
|
218
|
+
|
|
219
|
+
Please respond with EXACTLY one of these classifications, followed by your explanation.
|
|
220
|
+
Start your response with the classification on its own line."""
|
|
221
|
+
|
|
222
|
+
def _parse_response(self, response: str) -> Dict[str, Any]:
|
|
223
|
+
"""Parse classification response to extract value and explanation."""
|
|
224
|
+
if not response:
|
|
225
|
+
return {"value": None, "explanation": None}
|
|
226
|
+
|
|
227
|
+
lines = response.strip().split("\n")
|
|
228
|
+
if not lines:
|
|
229
|
+
return {"value": None, "explanation": None}
|
|
230
|
+
|
|
231
|
+
# First non-empty line should be the classification
|
|
232
|
+
first_line = lines[0].strip()
|
|
233
|
+
|
|
234
|
+
# Clean up common variations
|
|
235
|
+
# Remove markdown formatting, quotes, punctuation
|
|
236
|
+
cleaned = re.sub(r"[\*\"\'\`\:\.]", "", first_line).strip()
|
|
237
|
+
|
|
238
|
+
# Check for exact match first
|
|
239
|
+
for cls in self.classes:
|
|
240
|
+
if cleaned.lower() == cls.lower():
|
|
241
|
+
explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
|
|
242
|
+
return {"value": cls, "explanation": explanation}
|
|
243
|
+
|
|
244
|
+
# Check for prefix match (e.g., "Yes - the agent...")
|
|
245
|
+
for cls in self.classes:
|
|
246
|
+
if cleaned.lower().startswith(cls.lower()):
|
|
247
|
+
explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
|
|
248
|
+
return {"value": cls, "explanation": explanation}
|
|
249
|
+
|
|
250
|
+
# Check for the classification anywhere in first line
|
|
251
|
+
for cls in self.classes:
|
|
252
|
+
# Only match whole tokens/phrases so we don't accept accidental
|
|
253
|
+
# substrings (e.g., "Unknown" containing "No").
|
|
254
|
+
if re.search(rf"(?i)(?<![A-Za-z0-9_]){re.escape(cls)}(?![A-Za-z0-9_])", cleaned):
|
|
255
|
+
# Make sure it's not a partial match of another class
|
|
256
|
+
is_partial = False
|
|
257
|
+
for other_cls in self.classes:
|
|
258
|
+
if other_cls != cls and cls.lower() in other_cls.lower():
|
|
259
|
+
is_partial = True
|
|
260
|
+
break
|
|
261
|
+
if not is_partial:
|
|
262
|
+
explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
|
|
263
|
+
return {"value": cls, "explanation": explanation}
|
|
264
|
+
|
|
265
|
+
# Could not parse - return raw first line as value
|
|
266
|
+
explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
|
|
267
|
+
return {"value": first_line, "explanation": explanation}
|
|
268
|
+
|
|
269
|
+
def _extract_confidence(self, response: str, classification: str) -> Optional[float]:
|
|
270
|
+
"""Extract confidence from response using heuristics."""
|
|
271
|
+
if self.confidence_mode == "none":
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
# Heuristic: Look for confidence indicators in the response
|
|
275
|
+
response_lower = response.lower()
|
|
276
|
+
|
|
277
|
+
# High confidence indicators
|
|
278
|
+
high_indicators = [
|
|
279
|
+
"definitely",
|
|
280
|
+
"certainly",
|
|
281
|
+
"clearly",
|
|
282
|
+
"obviously",
|
|
283
|
+
"absolutely",
|
|
284
|
+
"100%",
|
|
285
|
+
"very confident",
|
|
286
|
+
]
|
|
287
|
+
for indicator in high_indicators:
|
|
288
|
+
if indicator in response_lower:
|
|
289
|
+
return 0.95
|
|
290
|
+
|
|
291
|
+
# Medium-high confidence
|
|
292
|
+
med_high_indicators = ["likely", "probably", "appears to be", "seems to be", "confident"]
|
|
293
|
+
for indicator in med_high_indicators:
|
|
294
|
+
if indicator in response_lower:
|
|
295
|
+
return 0.80
|
|
296
|
+
|
|
297
|
+
# Low confidence indicators
|
|
298
|
+
low_indicators = [
|
|
299
|
+
"possibly",
|
|
300
|
+
"might be",
|
|
301
|
+
"could be",
|
|
302
|
+
"not sure",
|
|
303
|
+
"uncertain",
|
|
304
|
+
"difficult to tell",
|
|
305
|
+
]
|
|
306
|
+
for indicator in low_indicators:
|
|
307
|
+
if indicator in response_lower:
|
|
308
|
+
return 0.50
|
|
309
|
+
|
|
310
|
+
# Default confidence when no indicators found
|
|
311
|
+
return 0.75
|
|
312
|
+
|
|
313
|
+
def reset(self) -> None:
|
|
314
|
+
"""Reset the classifier state (clear agent conversation)."""
|
|
315
|
+
if hasattr(self._agent, "reset"):
|
|
316
|
+
self._agent.reset()
|
|
317
|
+
|
|
318
|
+
def __repr__(self) -> str:
|
|
319
|
+
return f"LLMClassifier(classes={self.classes}, calls={self.total_calls}, retries={self.total_retries})"
|