tactus 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. tactus/__init__.py +1 -1
  2. tactus/adapters/__init__.py +18 -1
  3. tactus/adapters/broker_log.py +127 -34
  4. tactus/adapters/channels/__init__.py +153 -0
  5. tactus/adapters/channels/base.py +174 -0
  6. tactus/adapters/channels/broker.py +179 -0
  7. tactus/adapters/channels/cli.py +448 -0
  8. tactus/adapters/channels/host.py +225 -0
  9. tactus/adapters/channels/ipc.py +297 -0
  10. tactus/adapters/channels/sse.py +305 -0
  11. tactus/adapters/cli_hitl.py +223 -1
  12. tactus/adapters/control_loop.py +879 -0
  13. tactus/adapters/file_storage.py +35 -2
  14. tactus/adapters/ide_log.py +7 -1
  15. tactus/backends/http_backend.py +0 -1
  16. tactus/broker/client.py +31 -1
  17. tactus/broker/server.py +416 -92
  18. tactus/cli/app.py +270 -7
  19. tactus/cli/control.py +393 -0
  20. tactus/core/config_manager.py +33 -6
  21. tactus/core/dsl_stubs.py +102 -18
  22. tactus/core/execution_context.py +265 -8
  23. tactus/core/lua_sandbox.py +8 -9
  24. tactus/core/registry.py +19 -2
  25. tactus/core/runtime.py +235 -27
  26. tactus/docker/Dockerfile.pypi +49 -0
  27. tactus/docs/__init__.py +33 -0
  28. tactus/docs/extractor.py +326 -0
  29. tactus/docs/html_renderer.py +72 -0
  30. tactus/docs/models.py +121 -0
  31. tactus/docs/templates/base.html +204 -0
  32. tactus/docs/templates/index.html +58 -0
  33. tactus/docs/templates/module.html +96 -0
  34. tactus/dspy/agent.py +382 -22
  35. tactus/dspy/broker_lm.py +57 -6
  36. tactus/dspy/config.py +14 -3
  37. tactus/dspy/history.py +2 -1
  38. tactus/dspy/module.py +136 -11
  39. tactus/dspy/signature.py +0 -1
  40. tactus/ide/server.py +300 -9
  41. tactus/primitives/human.py +619 -47
  42. tactus/primitives/system.py +0 -1
  43. tactus/protocols/__init__.py +25 -0
  44. tactus/protocols/control.py +427 -0
  45. tactus/protocols/notification.py +207 -0
  46. tactus/sandbox/container_runner.py +79 -11
  47. tactus/sandbox/docker_manager.py +23 -0
  48. tactus/sandbox/entrypoint.py +26 -0
  49. tactus/sandbox/protocol.py +3 -0
  50. tactus/stdlib/README.md +77 -0
  51. tactus/stdlib/__init__.py +27 -1
  52. tactus/stdlib/classify/__init__.py +165 -0
  53. tactus/stdlib/classify/classify.spec.tac +195 -0
  54. tactus/stdlib/classify/classify.tac +257 -0
  55. tactus/stdlib/classify/fuzzy.py +282 -0
  56. tactus/stdlib/classify/llm.py +319 -0
  57. tactus/stdlib/classify/primitive.py +287 -0
  58. tactus/stdlib/core/__init__.py +57 -0
  59. tactus/stdlib/core/base.py +320 -0
  60. tactus/stdlib/core/confidence.py +211 -0
  61. tactus/stdlib/core/models.py +161 -0
  62. tactus/stdlib/core/retry.py +171 -0
  63. tactus/stdlib/core/validation.py +274 -0
  64. tactus/stdlib/extract/__init__.py +125 -0
  65. tactus/stdlib/extract/llm.py +330 -0
  66. tactus/stdlib/extract/primitive.py +256 -0
  67. tactus/stdlib/tac/tactus/classify/base.tac +51 -0
  68. tactus/stdlib/tac/tactus/classify/fuzzy.tac +87 -0
  69. tactus/stdlib/tac/tactus/classify/index.md +77 -0
  70. tactus/stdlib/tac/tactus/classify/init.tac +29 -0
  71. tactus/stdlib/tac/tactus/classify/llm.tac +150 -0
  72. tactus/stdlib/tac/tactus/classify.spec.tac +191 -0
  73. tactus/stdlib/tac/tactus/extract/base.tac +138 -0
  74. tactus/stdlib/tac/tactus/extract/index.md +96 -0
  75. tactus/stdlib/tac/tactus/extract/init.tac +27 -0
  76. tactus/stdlib/tac/tactus/extract/llm.tac +201 -0
  77. tactus/stdlib/tac/tactus/extract.spec.tac +153 -0
  78. tactus/stdlib/tac/tactus/generate/base.tac +142 -0
  79. tactus/stdlib/tac/tactus/generate/index.md +195 -0
  80. tactus/stdlib/tac/tactus/generate/init.tac +28 -0
  81. tactus/stdlib/tac/tactus/generate/llm.tac +169 -0
  82. tactus/stdlib/tac/tactus/generate.spec.tac +210 -0
  83. tactus/testing/behave_integration.py +171 -7
  84. tactus/testing/context.py +0 -1
  85. tactus/testing/evaluation_runner.py +0 -1
  86. tactus/testing/gherkin_parser.py +0 -1
  87. tactus/testing/mock_hitl.py +0 -1
  88. tactus/testing/mock_tools.py +0 -1
  89. tactus/testing/models.py +0 -1
  90. tactus/testing/steps/builtin.py +0 -1
  91. tactus/testing/steps/custom.py +81 -22
  92. tactus/testing/steps/registry.py +0 -1
  93. tactus/testing/test_runner.py +7 -1
  94. tactus/validation/semantic_visitor.py +11 -5
  95. tactus/validation/validator.py +0 -1
  96. {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/METADATA +14 -2
  97. {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/RECORD +100 -49
  98. {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/WHEEL +0 -0
  99. {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/entry_points.txt +0 -0
  100. {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,282 @@
1
+ """
2
+ FuzzyMatchClassifier - Classification using string similarity.
3
+
4
+ This classifier uses fuzzy string matching to classify text based on
5
+ similarity to expected values. Useful for verification tasks where
6
+ you want to check if a response matches an expected value.
7
+
8
+ Supports multiple algorithms from rapidfuzz library:
9
+ - ratio: Basic character-level similarity (default)
10
+ - token_set_ratio: Tokenize and compare unique words (handles reordering)
11
+ - token_sort_ratio: Sort tokens before comparing
12
+ - partial_ratio: Best substring match
13
+ """
14
+
15
+ import logging
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ try:
19
+ from rapidfuzz import fuzz
20
+
21
+ HAS_RAPIDFUZZ = True
22
+ except ImportError:
23
+ from difflib import SequenceMatcher
24
+
25
+ HAS_RAPIDFUZZ = False
26
+
27
+ from ..core.base import BaseClassifier
28
+ from ..core.models import ClassifierResult
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def calculate_similarity(s1: str, s2: str, algorithm: str = "ratio") -> float:
34
+ """
35
+ Calculate similarity between two strings using specified algorithm.
36
+
37
+ Args:
38
+ s1: First string
39
+ s2: Second string
40
+ algorithm: One of "ratio", "token_set_ratio", "token_sort_ratio", "partial_ratio"
41
+
42
+ Returns:
43
+ Float between 0.0 (no similarity) and 1.0 (identical)
44
+
45
+ Raises:
46
+ ValueError: If algorithm is not supported
47
+
48
+ Note:
49
+ Uses rapidfuzz if available (faster), falls back to difflib for basic ratio.
50
+ """
51
+ if not s1 or not s2:
52
+ return 0.0
53
+
54
+ # Normalize: lowercase and strip whitespace
55
+ s1_norm = s1.lower().strip()
56
+ s2_norm = s2.lower().strip()
57
+
58
+ if HAS_RAPIDFUZZ:
59
+ # Use rapidfuzz (C++ backend, faster)
60
+ if algorithm == "token_set_ratio":
61
+ score = fuzz.token_set_ratio(s1_norm, s2_norm)
62
+ elif algorithm == "token_sort_ratio":
63
+ score = fuzz.token_sort_ratio(s1_norm, s2_norm)
64
+ elif algorithm == "partial_ratio":
65
+ score = fuzz.partial_ratio(s1_norm, s2_norm)
66
+ elif algorithm == "ratio":
67
+ score = fuzz.ratio(s1_norm, s2_norm)
68
+ else:
69
+ raise ValueError(
70
+ f"Unsupported algorithm: {algorithm}. "
71
+ "Choose from: ratio, token_set_ratio, token_sort_ratio, partial_ratio"
72
+ )
73
+ # Normalize from 0-100 to 0.0-1.0
74
+ return score / 100.0
75
+ else:
76
+ # Fallback to difflib (only supports ratio)
77
+ if algorithm != "ratio":
78
+ raise ValueError(
79
+ f"Algorithm '{algorithm}' requires rapidfuzz library. "
80
+ "Install with: pip install rapidfuzz"
81
+ )
82
+ return SequenceMatcher(None, s1_norm, s2_norm).ratio()
83
+
84
+
85
+ class FuzzyMatchClassifier(BaseClassifier):
86
+ """
87
+ String similarity based classifier.
88
+
89
+ Compares input text against expected value(s) and returns whether
90
+ they match within a configurable threshold.
91
+
92
+ Two modes of operation:
93
+
94
+ 1. Binary mode (single expected value):
95
+ Returns "Yes" if similarity >= threshold, "No" otherwise.
96
+
97
+ classifier = FuzzyMatchClassifier(
98
+ expected="Customer Service",
99
+ threshold=0.8,
100
+ )
101
+ result = classifier.classify("customer service dept")
102
+ # result.value = "Yes", result.confidence = 0.92
103
+
104
+ 2. Multi-class mode (multiple expected values):
105
+ Returns the closest matching class if similarity >= threshold,
106
+ or "NO_MATCH" if nothing matches.
107
+
108
+ classifier = FuzzyMatchClassifier(
109
+ classes=["Technical Support", "Billing", "Sales"],
110
+ threshold=0.7,
111
+ )
112
+ result = classifier.classify("tech support")
113
+ # result.value = "Technical Support", result.confidence = 0.85
114
+
115
+ Example usage in Lua:
116
+ -- Binary: Does this match "Customer Service"?
117
+ result = Classify {
118
+ method = "fuzzy",
119
+ expected = "Customer Service",
120
+ threshold = 0.8,
121
+ input = agent_response
122
+ }
123
+
124
+ -- Multi-class: Which department?
125
+ result = Classify {
126
+ method = "fuzzy",
127
+ classes = {"Technical Support", "Billing", "Sales"},
128
+ threshold = 0.7,
129
+ input = department_name
130
+ }
131
+ """
132
+
133
+ def __init__(
134
+ self,
135
+ classes: Optional[List[str]] = None,
136
+ expected: Optional[str] = None,
137
+ threshold: float = 0.8,
138
+ algorithm: str = "ratio",
139
+ target_classes: Optional[List[str]] = None,
140
+ name: Optional[str] = None,
141
+ # Accept but ignore these (for factory compatibility)
142
+ config: Optional[Dict[str, Any]] = None,
143
+ **kwargs,
144
+ ):
145
+ """
146
+ Initialize FuzzyMatchClassifier.
147
+
148
+ Args:
149
+ classes: List of possible values to match against (multi-class mode)
150
+ expected: Single expected value (binary mode, returns Yes/No)
151
+ threshold: Minimum similarity score to consider a match (0.0 to 1.0)
152
+ algorithm: Similarity algorithm - "ratio" (default), "token_set_ratio",
153
+ "token_sort_ratio", or "partial_ratio"
154
+ target_classes: Classes considered "positive" for precision/recall
155
+ name: Optional name for this classifier
156
+
157
+ Algorithm details:
158
+ - ratio: Character-level similarity (best for exact matches)
159
+ - token_set_ratio: Tokenizes and compares unique words (handles reordering)
160
+ - token_sort_ratio: Sorts tokens before comparing (handles reordering)
161
+ - partial_ratio: Best substring match (good for abbreviations)
162
+ """
163
+ # If config dict is provided, extract parameters from it
164
+ if config is not None:
165
+ classes = config.get("classes", classes)
166
+ expected = config.get("expected", expected)
167
+ threshold = config.get("threshold", threshold)
168
+ algorithm = config.get("algorithm", algorithm)
169
+ target_classes = config.get("target_classes", target_classes)
170
+ name = config.get("name", name)
171
+
172
+ self.threshold = threshold
173
+ self.algorithm = algorithm
174
+ self.name = name
175
+
176
+ # Determine mode: binary or multi-class
177
+ if expected is not None:
178
+ # Binary mode: Yes/No based on match to expected
179
+ self.mode = "binary"
180
+ self.expected = expected
181
+ self.classes = ["Yes", "No"]
182
+ self.target_classes = target_classes or ["Yes"]
183
+ elif classes is not None and len(classes) > 0:
184
+ # Multi-class mode: return closest matching class
185
+ self.mode = "multiclass"
186
+ self.expected = None
187
+ self.classes = list(classes)
188
+ self.target_classes = target_classes or []
189
+ else:
190
+ raise ValueError(
191
+ "FuzzyMatchClassifier requires either 'expected' (binary mode) "
192
+ "or 'classes' (multi-class mode)"
193
+ )
194
+
195
+ # Track statistics
196
+ self.total_calls = 0
197
+
198
+ def classify(self, input_text: str) -> ClassifierResult:
199
+ """
200
+ Classify the input text using fuzzy string matching.
201
+
202
+ Args:
203
+ input_text: The text to classify
204
+
205
+ Returns:
206
+ ClassifierResult with value, confidence (similarity score), explanation
207
+ """
208
+ self.total_calls += 1
209
+
210
+ if self.mode == "binary":
211
+ return self._classify_binary(input_text)
212
+ else:
213
+ return self._classify_multiclass(input_text)
214
+
215
+ def _classify_binary(self, input_text: str) -> ClassifierResult:
216
+ """
217
+ Binary classification: Does input match expected value?
218
+
219
+ Returns "Yes" or "No" with similarity as confidence.
220
+ Also returns matched_text (the expected value) for consistency.
221
+ """
222
+ similarity = calculate_similarity(input_text, self.expected, self.algorithm)
223
+
224
+ if similarity >= self.threshold:
225
+ return ClassifierResult(
226
+ value="Yes",
227
+ confidence=similarity,
228
+ matched_text=self.expected, # Return what it matched against
229
+ explanation=f"Input matches expected value with {similarity:.1%} similarity using {self.algorithm} (threshold: {self.threshold:.1%})",
230
+ )
231
+ else:
232
+ return ClassifierResult(
233
+ value="No",
234
+ confidence=1.0 - similarity, # Confidence in "No"
235
+ matched_text=None, # No match found
236
+ explanation=f"Input does not match expected value. Similarity: {similarity:.1%} using {self.algorithm} (threshold: {self.threshold:.1%})",
237
+ )
238
+
239
+ def _classify_multiclass(self, input_text: str) -> ClassifierResult:
240
+ """
241
+ Multi-class classification: Find best matching class.
242
+
243
+ Returns the closest matching class or "NO_MATCH" if none meet threshold.
244
+ matched_text contains the actual matched string from the classes list.
245
+ """
246
+ best_match = None
247
+ best_similarity = 0.0
248
+
249
+ for cls in self.classes:
250
+ similarity = calculate_similarity(input_text, cls, self.algorithm)
251
+ if similarity > best_similarity:
252
+ best_similarity = similarity
253
+ best_match = cls
254
+
255
+ if best_similarity >= self.threshold:
256
+ return ClassifierResult(
257
+ value=best_match,
258
+ confidence=best_similarity,
259
+ matched_text=best_match, # The actual matched class name
260
+ explanation=f"Best match: '{best_match}' with {best_similarity:.1%} similarity using {self.algorithm}",
261
+ )
262
+ else:
263
+ return ClassifierResult(
264
+ value="NO_MATCH",
265
+ confidence=1.0 - best_similarity, # Confidence in no match
266
+ matched_text=None, # No match found
267
+ explanation=f"No class matched above threshold using {self.algorithm}. Best was '{best_match}' at {best_similarity:.1%} (threshold: {self.threshold:.1%})",
268
+ )
269
+
270
+ def reset(self) -> None:
271
+ """Reset classifier state (no-op for fuzzy matcher)."""
272
+ pass
273
+
274
+ def __repr__(self) -> str:
275
+ if self.mode == "binary":
276
+ return f"FuzzyMatchClassifier(expected='{self.expected}', threshold={self.threshold}, algorithm='{self.algorithm}')"
277
+ else:
278
+ return f"FuzzyMatchClassifier(classes={self.classes}, threshold={self.threshold}, algorithm='{self.algorithm}')"
279
+
280
+
281
+ # Also provide as FuzzyClassifier for shorter name
282
+ FuzzyClassifier = FuzzyMatchClassifier
@@ -0,0 +1,319 @@
1
+ """
2
+ LLMClassifier - Classification using Language Models with retry logic.
3
+
4
+ This classifier uses an LLM (via agent_factory) to classify text, with built-in
5
+ retry logic that provides conversational feedback when the model returns invalid
6
+ classifications.
7
+ """
8
+
9
+ import logging
10
+ import re
11
+ from typing import Any, Callable, Dict, List, Optional
12
+
13
+ from ..core.base import BaseClassifier
14
+ from ..core.models import ClassifierResult
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class LLMClassifier(BaseClassifier):
20
+ """
21
+ LLM-based classifier with automatic retry and validation.
22
+
23
+ Uses conversational feedback to help the LLM self-correct when it
24
+ returns invalid classifications.
25
+
26
+ Example:
27
+ classifier = LLMClassifier(
28
+ classes=["Yes", "No"],
29
+ prompt="Did the agent greet the customer?",
30
+ agent_factory=my_agent_factory,
31
+ )
32
+ result = classifier.classify("Hello, how can I help you today?")
33
+ # result.value = "Yes"
34
+ # result.confidence = 0.95
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ classes: List[str],
40
+ prompt: str,
41
+ agent_factory: Callable[[Dict[str, Any]], Any],
42
+ target_classes: Optional[List[str]] = None,
43
+ max_retries: int = 3,
44
+ temperature: float = 0.3,
45
+ model: Optional[str] = None,
46
+ confidence_mode: str = "heuristic",
47
+ name: Optional[str] = None,
48
+ ):
49
+ """
50
+ Initialize LLMClassifier.
51
+
52
+ Args:
53
+ classes: List of valid classification values
54
+ prompt: Classification instruction/prompt
55
+ agent_factory: Factory function to create Agent instances
56
+ target_classes: Classes considered "positive" for precision/recall
57
+ max_retries: Maximum retry attempts on invalid output
58
+ temperature: LLM temperature for classification
59
+ model: Specific model to use (optional)
60
+ confidence_mode: "heuristic" or "none"
61
+ name: Optional name for this classifier
62
+ """
63
+ self.classes = classes
64
+ self.target_classes = target_classes or []
65
+ self.name = name
66
+ self.prompt = prompt
67
+ self.agent_factory = agent_factory
68
+ self.max_retries = max_retries
69
+ self.temperature = temperature
70
+ self.model = model
71
+ self.confidence_mode = confidence_mode
72
+
73
+ # Build classification system prompt
74
+ self._system_prompt = self._build_system_prompt()
75
+
76
+ # Create agent for classification
77
+ self._agent = self._create_agent()
78
+
79
+ # Track statistics
80
+ self.total_calls = 0
81
+ self.total_retries = 0
82
+
83
+ def _build_system_prompt(self) -> str:
84
+ """Build the classification system prompt."""
85
+ classes_str = ", ".join(f'"{c}"' for c in self.classes)
86
+
87
+ return f"""You are a classification assistant. Your task is to classify input according to the following instruction:
88
+
89
+ {self.prompt}
90
+
91
+ VALID CLASSIFICATIONS: {classes_str}
92
+
93
+ IMPORTANT RULES:
94
+ 1. You MUST respond with EXACTLY one of the valid classifications listed above.
95
+ 2. Start your response with the classification on its own line.
96
+ 3. Then provide a brief explanation on the following lines.
97
+
98
+ RESPONSE FORMAT:
99
+ <classification>
100
+ <explanation>
101
+
102
+ Example:
103
+ Yes
104
+ The text clearly indicates agreement because...
105
+ """
106
+
107
+ def _create_agent(self) -> Any:
108
+ """Create the internal Agent for classification."""
109
+ if self.agent_factory is None:
110
+ raise RuntimeError("LLMClassifier requires agent_factory")
111
+
112
+ agent_config = {
113
+ "system_prompt": self._system_prompt,
114
+ "temperature": self.temperature,
115
+ }
116
+ # Optional stable name for mocking/traceability. When set, the DSL wrapper
117
+ # renames the internal _temp_agent_* handle so it can be mocked via:
118
+ # Mocks { <name> = { message = "...", tool_calls = {...} } }
119
+ if self.name:
120
+ agent_config["name"] = self.name
121
+ if self.model:
122
+ agent_config["model"] = self.model
123
+
124
+ return self.agent_factory(agent_config)
125
+
126
+ def classify(self, input_text: str) -> ClassifierResult:
127
+ """
128
+ Classify the input text with retry logic.
129
+
130
+ Args:
131
+ input_text: The text to classify
132
+
133
+ Returns:
134
+ ClassifierResult with value, confidence, explanation
135
+ """
136
+ self.total_calls += 1
137
+
138
+ # Reset agent conversation for fresh classification
139
+ if hasattr(self._agent, "reset"):
140
+ self._agent.reset()
141
+
142
+ retry_count = 0
143
+ last_response = None
144
+
145
+ for attempt in range(self.max_retries + 1):
146
+ # Build the message for this attempt
147
+ if attempt == 0:
148
+ message = f"Please classify the following:\n\n{input_text}"
149
+ else:
150
+ # Retry with feedback
151
+ retry_count += 1
152
+ self.total_retries += 1
153
+ message = self._build_retry_feedback(last_response)
154
+ logger.debug(f"Classification retry {retry_count}: {message[:100]}...")
155
+
156
+ # Call the agent
157
+ try:
158
+ result = self._call_agent(message)
159
+ last_response = result.get("response") or result.get("message") or str(result)
160
+ except Exception as e:
161
+ logger.error(f"Agent call failed: {e}")
162
+ return ClassifierResult(
163
+ value="ERROR",
164
+ error=str(e),
165
+ retry_count=retry_count,
166
+ )
167
+
168
+ # Parse the response
169
+ parsed = self._parse_response(last_response)
170
+
171
+ # Check if classification is valid
172
+ if parsed["value"] in self.classes:
173
+ confidence = self._extract_confidence(last_response, parsed["value"])
174
+ return ClassifierResult(
175
+ value=parsed["value"],
176
+ confidence=confidence,
177
+ explanation=parsed["explanation"],
178
+ retry_count=retry_count,
179
+ raw_response=last_response,
180
+ )
181
+
182
+ logger.debug(f"Invalid classification '{parsed['value']}', retrying...")
183
+
184
+ # All retries exhausted
185
+ logger.warning(f"Classification failed after {self.max_retries} retries")
186
+ return ClassifierResult(
187
+ value="ERROR",
188
+ error=f"Max retries ({self.max_retries}) exceeded. Last response: {last_response[:200] if last_response else 'None'}",
189
+ retry_count=retry_count,
190
+ raw_response=last_response,
191
+ )
192
+
193
+ def _call_agent(self, message: str) -> Dict[str, Any]:
194
+ """Call the internal agent with a message."""
195
+ input_dict = {"message": message}
196
+ result = self._agent(input_dict)
197
+
198
+ # Convert result to dict
199
+ if hasattr(result, "to_dict"):
200
+ return result.to_dict()
201
+ if hasattr(result, "message"):
202
+ return {"response": result.message}
203
+ if hasattr(result, "response"):
204
+ return {"response": result.response}
205
+ if isinstance(result, dict):
206
+ return result
207
+
208
+ return {"response": str(result)}
209
+
210
+ def _build_retry_feedback(self, last_response: str) -> str:
211
+ """Build feedback message for retry."""
212
+ classes_str = ", ".join(f'"{c}"' for c in self.classes)
213
+ return f"""Your previous response was not a valid classification.
214
+
215
+ Previous response: "{last_response[:200]}..."
216
+
217
+ VALID CLASSIFICATIONS ARE: {classes_str}
218
+
219
+ Please respond with EXACTLY one of these classifications, followed by your explanation.
220
+ Start your response with the classification on its own line."""
221
+
222
+ def _parse_response(self, response: str) -> Dict[str, Any]:
223
+ """Parse classification response to extract value and explanation."""
224
+ if not response:
225
+ return {"value": None, "explanation": None}
226
+
227
+ lines = response.strip().split("\n")
228
+ if not lines:
229
+ return {"value": None, "explanation": None}
230
+
231
+ # First non-empty line should be the classification
232
+ first_line = lines[0].strip()
233
+
234
+ # Clean up common variations
235
+ # Remove markdown formatting, quotes, punctuation
236
+ cleaned = re.sub(r"[\*\"\'\`\:\.]", "", first_line).strip()
237
+
238
+ # Check for exact match first
239
+ for cls in self.classes:
240
+ if cleaned.lower() == cls.lower():
241
+ explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
242
+ return {"value": cls, "explanation": explanation}
243
+
244
+ # Check for prefix match (e.g., "Yes - the agent...")
245
+ for cls in self.classes:
246
+ if cleaned.lower().startswith(cls.lower()):
247
+ explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
248
+ return {"value": cls, "explanation": explanation}
249
+
250
+ # Check for the classification anywhere in first line
251
+ for cls in self.classes:
252
+ # Only match whole tokens/phrases so we don't accept accidental
253
+ # substrings (e.g., "Unknown" containing "No").
254
+ if re.search(rf"(?i)(?<![A-Za-z0-9_]){re.escape(cls)}(?![A-Za-z0-9_])", cleaned):
255
+ # Make sure it's not a partial match of another class
256
+ is_partial = False
257
+ for other_cls in self.classes:
258
+ if other_cls != cls and cls.lower() in other_cls.lower():
259
+ is_partial = True
260
+ break
261
+ if not is_partial:
262
+ explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
263
+ return {"value": cls, "explanation": explanation}
264
+
265
+ # Could not parse - return raw first line as value
266
+ explanation = "\n".join(lines[1:]).strip() if len(lines) > 1 else None
267
+ return {"value": first_line, "explanation": explanation}
268
+
269
+ def _extract_confidence(self, response: str, classification: str) -> Optional[float]:
270
+ """Extract confidence from response using heuristics."""
271
+ if self.confidence_mode == "none":
272
+ return None
273
+
274
+ # Heuristic: Look for confidence indicators in the response
275
+ response_lower = response.lower()
276
+
277
+ # High confidence indicators
278
+ high_indicators = [
279
+ "definitely",
280
+ "certainly",
281
+ "clearly",
282
+ "obviously",
283
+ "absolutely",
284
+ "100%",
285
+ "very confident",
286
+ ]
287
+ for indicator in high_indicators:
288
+ if indicator in response_lower:
289
+ return 0.95
290
+
291
+ # Medium-high confidence
292
+ med_high_indicators = ["likely", "probably", "appears to be", "seems to be", "confident"]
293
+ for indicator in med_high_indicators:
294
+ if indicator in response_lower:
295
+ return 0.80
296
+
297
+ # Low confidence indicators
298
+ low_indicators = [
299
+ "possibly",
300
+ "might be",
301
+ "could be",
302
+ "not sure",
303
+ "uncertain",
304
+ "difficult to tell",
305
+ ]
306
+ for indicator in low_indicators:
307
+ if indicator in response_lower:
308
+ return 0.50
309
+
310
+ # Default confidence when no indicators found
311
+ return 0.75
312
+
313
+ def reset(self) -> None:
314
+ """Reset the classifier state (clear agent conversation)."""
315
+ if hasattr(self._agent, "reset"):
316
+ self._agent.reset()
317
+
318
+ def __repr__(self) -> str:
319
+ return f"LLMClassifier(classes={self.classes}, calls={self.total_calls}, retries={self.total_retries})"