tactus 0.31.0__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. tactus/__init__.py +1 -1
  2. tactus/adapters/__init__.py +18 -1
  3. tactus/adapters/broker_log.py +127 -34
  4. tactus/adapters/channels/__init__.py +153 -0
  5. tactus/adapters/channels/base.py +174 -0
  6. tactus/adapters/channels/broker.py +179 -0
  7. tactus/adapters/channels/cli.py +448 -0
  8. tactus/adapters/channels/host.py +225 -0
  9. tactus/adapters/channels/ipc.py +297 -0
  10. tactus/adapters/channels/sse.py +305 -0
  11. tactus/adapters/cli_hitl.py +223 -1
  12. tactus/adapters/control_loop.py +879 -0
  13. tactus/adapters/file_storage.py +35 -2
  14. tactus/adapters/ide_log.py +7 -1
  15. tactus/backends/http_backend.py +0 -1
  16. tactus/broker/client.py +31 -1
  17. tactus/broker/server.py +416 -92
  18. tactus/cli/app.py +270 -7
  19. tactus/cli/control.py +393 -0
  20. tactus/core/config_manager.py +33 -6
  21. tactus/core/dsl_stubs.py +102 -18
  22. tactus/core/execution_context.py +265 -8
  23. tactus/core/lua_sandbox.py +8 -9
  24. tactus/core/registry.py +19 -2
  25. tactus/core/runtime.py +235 -27
  26. tactus/docker/Dockerfile.pypi +49 -0
  27. tactus/docs/__init__.py +33 -0
  28. tactus/docs/extractor.py +326 -0
  29. tactus/docs/html_renderer.py +72 -0
  30. tactus/docs/models.py +121 -0
  31. tactus/docs/templates/base.html +204 -0
  32. tactus/docs/templates/index.html +58 -0
  33. tactus/docs/templates/module.html +96 -0
  34. tactus/dspy/agent.py +403 -22
  35. tactus/dspy/broker_lm.py +57 -6
  36. tactus/dspy/config.py +14 -3
  37. tactus/dspy/history.py +2 -1
  38. tactus/dspy/module.py +136 -11
  39. tactus/dspy/signature.py +0 -1
  40. tactus/ide/config_server.py +536 -0
  41. tactus/ide/server.py +345 -21
  42. tactus/primitives/human.py +619 -47
  43. tactus/primitives/system.py +0 -1
  44. tactus/protocols/__init__.py +25 -0
  45. tactus/protocols/control.py +427 -0
  46. tactus/protocols/notification.py +207 -0
  47. tactus/sandbox/container_runner.py +79 -11
  48. tactus/sandbox/docker_manager.py +23 -0
  49. tactus/sandbox/entrypoint.py +26 -0
  50. tactus/sandbox/protocol.py +3 -0
  51. tactus/stdlib/README.md +77 -0
  52. tactus/stdlib/__init__.py +27 -1
  53. tactus/stdlib/classify/__init__.py +165 -0
  54. tactus/stdlib/classify/classify.spec.tac +195 -0
  55. tactus/stdlib/classify/classify.tac +257 -0
  56. tactus/stdlib/classify/fuzzy.py +282 -0
  57. tactus/stdlib/classify/llm.py +319 -0
  58. tactus/stdlib/classify/primitive.py +287 -0
  59. tactus/stdlib/core/__init__.py +57 -0
  60. tactus/stdlib/core/base.py +320 -0
  61. tactus/stdlib/core/confidence.py +211 -0
  62. tactus/stdlib/core/models.py +161 -0
  63. tactus/stdlib/core/retry.py +171 -0
  64. tactus/stdlib/core/validation.py +274 -0
  65. tactus/stdlib/extract/__init__.py +125 -0
  66. tactus/stdlib/extract/llm.py +330 -0
  67. tactus/stdlib/extract/primitive.py +256 -0
  68. tactus/stdlib/tac/tactus/classify/base.tac +51 -0
  69. tactus/stdlib/tac/tactus/classify/fuzzy.tac +87 -0
  70. tactus/stdlib/tac/tactus/classify/index.md +77 -0
  71. tactus/stdlib/tac/tactus/classify/init.tac +29 -0
  72. tactus/stdlib/tac/tactus/classify/llm.tac +150 -0
  73. tactus/stdlib/tac/tactus/classify.spec.tac +191 -0
  74. tactus/stdlib/tac/tactus/extract/base.tac +138 -0
  75. tactus/stdlib/tac/tactus/extract/index.md +96 -0
  76. tactus/stdlib/tac/tactus/extract/init.tac +27 -0
  77. tactus/stdlib/tac/tactus/extract/llm.tac +201 -0
  78. tactus/stdlib/tac/tactus/extract.spec.tac +153 -0
  79. tactus/stdlib/tac/tactus/generate/base.tac +142 -0
  80. tactus/stdlib/tac/tactus/generate/index.md +195 -0
  81. tactus/stdlib/tac/tactus/generate/init.tac +28 -0
  82. tactus/stdlib/tac/tactus/generate/llm.tac +169 -0
  83. tactus/stdlib/tac/tactus/generate.spec.tac +210 -0
  84. tactus/testing/behave_integration.py +171 -7
  85. tactus/testing/context.py +0 -1
  86. tactus/testing/evaluation_runner.py +0 -1
  87. tactus/testing/gherkin_parser.py +0 -1
  88. tactus/testing/mock_hitl.py +0 -1
  89. tactus/testing/mock_tools.py +0 -1
  90. tactus/testing/models.py +0 -1
  91. tactus/testing/steps/builtin.py +0 -1
  92. tactus/testing/steps/custom.py +81 -22
  93. tactus/testing/steps/registry.py +0 -1
  94. tactus/testing/test_runner.py +7 -1
  95. tactus/validation/semantic_visitor.py +11 -5
  96. tactus/validation/validator.py +0 -1
  97. {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/METADATA +16 -2
  98. {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/RECORD +101 -49
  99. {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/WHEEL +0 -0
  100. {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/entry_points.txt +0 -0
  101. {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,287 @@
1
+ """
2
+ ClassifyPrimitive - Smart classification with built-in retry and validation.
3
+
4
+ This primitive wraps the classification infrastructure to provide:
5
+ - Automatic retry with conversational feedback
6
+ - Output validation against valid classes
7
+ - Confidence extraction from response or logprobs
8
+ - Structured result format (value, confidence, explanation)
9
+
10
+ The primitive supports multiple classification methods:
11
+ - "llm" (default): LLM-based classification with retry logic
12
+ - "fuzzy": String similarity based classification (coming soon)
13
+ """
14
+
15
+ import logging
16
+ from typing import Any, Dict
17
+
18
+ from ..core.base import BaseClassifier, ClassifierFactory
19
+ from ..core.models import ClassifierResult
20
+ from .llm import LLMClassifier
21
+ from .fuzzy import FuzzyMatchClassifier
22
+
23
+ __all__ = ["ClassifyPrimitive", "ClassifyHandle", "ClassifierResult"]
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Register classifier methods with the factory
28
+ ClassifierFactory.register("llm", LLMClassifier)
29
+ ClassifierFactory.register("fuzzy", FuzzyMatchClassifier)
30
+
31
+
32
+ class ClassifyHandle:
33
+ """
34
+ A reusable classifier handle for Lua interop.
35
+
36
+ This is a thin wrapper around a BaseClassifier that handles
37
+ Lua table conversion.
38
+
39
+ Created by Classify { ... } and can be called multiple times.
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ classifier: BaseClassifier,
45
+ lua_table_from: Any = None,
46
+ ):
47
+ """
48
+ Initialize ClassifyHandle.
49
+
50
+ Args:
51
+ classifier: The underlying BaseClassifier instance
52
+ lua_table_from: Function to convert Python dicts to Lua tables
53
+ """
54
+ self._classifier = classifier
55
+ self.lua_table_from = lua_table_from
56
+
57
+ # Expose classifier attributes
58
+ self.classes = classifier.classes
59
+ self.target_classes = classifier.target_classes
60
+
61
+ # For test access
62
+ self._agent = getattr(classifier, "_agent", None)
63
+
64
+ def __call__(self, input_value: Any) -> ClassifierResult:
65
+ """
66
+ Classify the input.
67
+
68
+ Args:
69
+ input_value: Input text or dict with 'text' field
70
+
71
+ Returns:
72
+ ClassifierResult with value, confidence, explanation
73
+ """
74
+ # Extract text from input
75
+ if isinstance(input_value, dict):
76
+ text = input_value.get("text") or input_value.get("input") or str(input_value)
77
+ else:
78
+ text = str(input_value)
79
+
80
+ return self._classifier.classify(text)
81
+
82
+ def reset(self):
83
+ """Reset the classifier state."""
84
+ self._classifier.reset()
85
+
86
+ @property
87
+ def total_calls(self) -> int:
88
+ """Get total number of calls made."""
89
+ return getattr(self._classifier, "total_calls", 0)
90
+
91
+ @property
92
+ def total_retries(self) -> int:
93
+ """Get total number of retries."""
94
+ return getattr(self._classifier, "total_retries", 0)
95
+
96
+ def __repr__(self) -> str:
97
+ return f"ClassifyHandle(classifier={self._classifier})"
98
+
99
+
100
+ class ClassifyPrimitive:
101
+ """
102
+ Smart classification primitive with retry logic.
103
+
104
+ Follows the Agent pattern - can be configured once and called multiple times,
105
+ or used as a one-shot classifier.
106
+
107
+ Example usage in Lua:
108
+ -- One-shot classification
109
+ result = Classify {
110
+ classes = {"Yes", "No"},
111
+ prompt = "Did the agent greet the customer?",
112
+ input = transcript
113
+ }
114
+
115
+ -- Reusable classifier
116
+ classifier = Classify {
117
+ classes = {"positive", "negative", "neutral"},
118
+ prompt = "What is the sentiment?"
119
+ }
120
+ result1 = classifier(text1)
121
+ result2 = classifier(text2)
122
+
123
+ -- With target classes for metrics
124
+ classifier = Classify {
125
+ classes = {"Yes", "No", "NA"},
126
+ target_classes = {"Yes"},
127
+ prompt = "Did the agent comply?"
128
+ }
129
+ """
130
+
131
+ def __init__(
132
+ self,
133
+ agent_factory: Any,
134
+ lua_table_from: Any = None,
135
+ registry: Any = None,
136
+ mock_manager: Any = None,
137
+ ):
138
+ """
139
+ Initialize ClassifyPrimitive.
140
+
141
+ Args:
142
+ agent_factory: Factory function to create Agent instances
143
+ lua_table_from: Function to convert Python dicts to Lua tables
144
+ registry: Optional registry for accessing mocks
145
+ mock_manager: Optional mock manager for testing
146
+ """
147
+ self.agent_factory = agent_factory
148
+ self.lua_table_from = lua_table_from
149
+ self.registry = registry
150
+ self.mock_manager = mock_manager
151
+
152
+ def __call__(self, config: Dict[str, Any]) -> Any:
153
+ """
154
+ Create a classifier from configuration.
155
+
156
+ This is called when Lua does: Classify { ... }
157
+
158
+ Args:
159
+ config: Classification configuration
160
+ - classes: List of valid classification values (required)
161
+ - prompt: Classification prompt/instruction (required for LLM)
162
+ - input: Optional input for one-shot classification
163
+ - method: Classification method ("llm" or "fuzzy", default: "llm")
164
+ - target_classes: Classes for precision/recall metrics (optional)
165
+ - max_retries: Maximum retry attempts (default: 3)
166
+ - temperature: Model temperature (default: 0.3)
167
+ - model: Model to use (optional)
168
+ - confidence_mode: "heuristic" or "none" (default: "heuristic")
169
+
170
+ Returns:
171
+ ClassifyHandle if no input provided (reusable)
172
+ dict if input provided (one-shot result)
173
+ """
174
+ # Convert Lua table to Python dict
175
+ config = self._lua_to_python(config)
176
+
177
+ # Debug: log the config
178
+ import logging
179
+
180
+ logger = logging.getLogger(__name__)
181
+ logger.info(f"[Classify] Received config: {config}")
182
+
183
+ method = config.get("method", "llm")
184
+
185
+ # Validate required fields based on method
186
+ if method == "llm":
187
+ classes = config.get("classes")
188
+ if not classes:
189
+ raise ValueError("Classify requires 'classes' field")
190
+ prompt = config.get("prompt")
191
+ if not prompt:
192
+ raise ValueError("Classify requires 'prompt' field")
193
+ elif method == "fuzzy":
194
+ # Fuzzy mode can use either 'expected' (binary) or 'classes' (multi-class)
195
+ if not config.get("expected") and not config.get("classes"):
196
+ raise ValueError(
197
+ "Classify with method='fuzzy' requires either 'expected' "
198
+ "(for binary Yes/No) or 'classes' (for multi-class matching)"
199
+ )
200
+ else:
201
+ # Unknown method - let factory handle validation
202
+ pass
203
+
204
+ # Create the classifier using the factory
205
+ classifier = self._create_classifier(config)
206
+
207
+ # Wrap in handle for Lua interop
208
+ handle = ClassifyHandle(
209
+ classifier=classifier,
210
+ lua_table_from=self.lua_table_from,
211
+ )
212
+
213
+ # If input is provided, do one-shot classification
214
+ input_text = config.get("input")
215
+ if input_text is not None:
216
+ result = handle(input_text)
217
+ return self._to_lua_table(result.to_dict())
218
+
219
+ return handle
220
+
221
+ def _create_classifier(self, config: Dict[str, Any]) -> BaseClassifier:
222
+ """
223
+ Create a classifier based on configuration.
224
+
225
+ Args:
226
+ config: Configuration dict
227
+
228
+ Returns:
229
+ BaseClassifier instance
230
+ """
231
+ method = config.get("method", "llm")
232
+
233
+ if method == "llm":
234
+ return LLMClassifier(
235
+ classes=config["classes"],
236
+ prompt=config["prompt"],
237
+ agent_factory=self.agent_factory,
238
+ target_classes=config.get("target_classes"),
239
+ max_retries=config.get("max_retries", 3),
240
+ temperature=config.get("temperature", 0.3),
241
+ model=config.get("model"),
242
+ confidence_mode=config.get("confidence_mode", "heuristic"),
243
+ name=config.get("name"),
244
+ )
245
+ else:
246
+ # Use the factory for other methods
247
+ # Add agent_factory to config for methods that need it
248
+ factory_config = {**config, "agent_factory": self.agent_factory}
249
+ return ClassifierFactory.create(factory_config)
250
+
251
+ def _lua_to_python(self, value: Any) -> Any:
252
+ """Convert Lua table to Python dict recursively."""
253
+ if value is None:
254
+ return None
255
+
256
+ try:
257
+ from lupa import lua_type
258
+
259
+ if lua_type(value) == "table":
260
+ # Check if it's an array (1-indexed sequential keys)
261
+ result = {}
262
+ max_int_key = 0
263
+ has_string_keys = False
264
+
265
+ for k, v in value.items():
266
+ if isinstance(k, int):
267
+ max_int_key = max(max_int_key, k)
268
+ else:
269
+ has_string_keys = True
270
+ result[k] = self._lua_to_python(v)
271
+
272
+ # If all keys are sequential integers 1..n, convert to list
273
+ if not has_string_keys and max_int_key == len(result):
274
+ return [result[i] for i in range(1, max_int_key + 1)]
275
+
276
+ return result
277
+ return value
278
+ except ImportError:
279
+ return value
280
+
281
+ def _to_lua_table(self, value: Any) -> Any:
282
+ """Convert Python value to Lua table."""
283
+ if self.lua_table_from is None:
284
+ return value
285
+ if isinstance(value, dict):
286
+ return self.lua_table_from(value)
287
+ return value
@@ -0,0 +1,57 @@
1
+ """
2
+ Tactus Standard Library - Core Module
3
+
4
+ Provides abstract base classes, common models, and shared utilities
5
+ used across all stdlib primitives.
6
+
7
+ Base Classes:
8
+ - BaseClassifier: ABC for all classification strategies
9
+ - BaseExtractor: ABC for all extraction strategies
10
+
11
+ Models:
12
+ - ClassifierResult: Result from any classifier
13
+ - ExtractorResult: Result from any extractor
14
+ - EvaluationResult: Metrics from evaluation
15
+
16
+ Utilities:
17
+ - RetryWithFeedback: Retry logic with conversational feedback
18
+ - extract_confidence: Confidence extraction heuristics
19
+ - validate_output: Output validation
20
+ """
21
+
22
+ from .base import (
23
+ BaseClassifier,
24
+ BaseExtractor,
25
+ ClassifierFactory,
26
+ ExtractorFactory,
27
+ )
28
+ from .models import (
29
+ ClassifierResult,
30
+ ExtractorResult,
31
+ EvaluationResult,
32
+ ClassifierConfig,
33
+ ExtractorConfig,
34
+ )
35
+ from .retry import RetryWithFeedback
36
+ from .confidence import extract_confidence
37
+ from .validation import validate_output
38
+
39
+ __all__ = [
40
+ # Base classes
41
+ "BaseClassifier",
42
+ "BaseExtractor",
43
+ # Factories
44
+ "ClassifierFactory",
45
+ "ExtractorFactory",
46
+ # Result models
47
+ "ClassifierResult",
48
+ "ExtractorResult",
49
+ "EvaluationResult",
50
+ # Config models
51
+ "ClassifierConfig",
52
+ "ExtractorConfig",
53
+ # Utilities
54
+ "RetryWithFeedback",
55
+ "extract_confidence",
56
+ "validate_output",
57
+ ]
@@ -0,0 +1,320 @@
1
+ """
2
+ Abstract base classes for stdlib primitives.
3
+
4
+ These ABCs define the interface that all classifiers and extractors must implement,
5
+ enabling polymorphism and consistent behavior across different implementations.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any, Dict, List, Optional, Type
10
+
11
+ from .models import ClassifierResult, ExtractorResult, EvaluationResult
12
+
13
+
14
+ class BaseClassifier(ABC):
15
+ """
16
+ Abstract base class for all classification strategies.
17
+
18
+ Subclasses must implement the `classify` method. This enables:
19
+ - LLMClassifier: Uses LLM with retry logic
20
+ - FuzzyMatchClassifier: Uses string similarity
21
+ - Custom classifiers: User-defined implementations
22
+
23
+ All classifiers return ClassifierResult for consistent API.
24
+
25
+ Example:
26
+ class MyClassifier(BaseClassifier):
27
+ def classify(self, input_text: str) -> ClassifierResult:
28
+ # Custom classification logic
29
+ return ClassifierResult(value="Yes", confidence=0.9)
30
+ """
31
+
32
+ # Configuration (set by subclasses)
33
+ classes: List[str] = []
34
+ target_classes: List[str] = []
35
+ name: Optional[str] = None
36
+
37
+ @abstractmethod
38
+ def classify(self, input_text: str) -> ClassifierResult:
39
+ """
40
+ Classify the input text and return a result.
41
+
42
+ Args:
43
+ input_text: The text to classify
44
+
45
+ Returns:
46
+ ClassifierResult with value, confidence, explanation, etc.
47
+ """
48
+ ...
49
+
50
+ def __call__(self, input_value: Any) -> ClassifierResult:
51
+ """
52
+ Make classifiers callable.
53
+
54
+ Handles both string input and dict input (for Lua interop).
55
+
56
+ Args:
57
+ input_value: Either a string or dict with 'text'/'input' key
58
+
59
+ Returns:
60
+ ClassifierResult
61
+ """
62
+ if isinstance(input_value, dict):
63
+ text = input_value.get("text") or input_value.get("input") or str(input_value)
64
+ else:
65
+ text = str(input_value)
66
+
67
+ return self.classify(text)
68
+
69
+ def reset(self) -> None:
70
+ """
71
+ Reset any internal state (e.g., conversation history).
72
+
73
+ Override in subclasses that maintain state.
74
+ """
75
+ pass
76
+
77
+ @classmethod
78
+ def evaluate(
79
+ cls,
80
+ classifier: "BaseClassifier",
81
+ test_data: List[Dict[str, Any]],
82
+ label_key: str = "label",
83
+ input_key: str = "text",
84
+ ) -> EvaluationResult:
85
+ """
86
+ Evaluate a classifier on test data.
87
+
88
+ Args:
89
+ classifier: The classifier instance to evaluate
90
+ test_data: List of dicts with input text and expected labels
91
+ label_key: Key for expected label in test data
92
+ input_key: Key for input text in test data
93
+
94
+ Returns:
95
+ EvaluationResult with accuracy, precision, recall, F1
96
+ """
97
+ from collections import defaultdict
98
+
99
+ predictions = []
100
+ labels = []
101
+ total_retries = 0
102
+ confidences = []
103
+ errors = []
104
+
105
+ for item in test_data:
106
+ text = item.get(input_key, "")
107
+ expected = item.get(label_key)
108
+
109
+ try:
110
+ result = classifier.classify(text)
111
+ predictions.append(result.value)
112
+ labels.append(expected)
113
+ total_retries += result.retry_count
114
+ if result.confidence is not None:
115
+ confidences.append(result.confidence)
116
+ except Exception as e:
117
+ errors.append(f"Error on item: {str(e)}")
118
+ predictions.append("ERROR")
119
+ labels.append(expected)
120
+
121
+ # Calculate accuracy
122
+ correct = sum(pred == label for pred, label in zip(predictions, labels))
123
+ accuracy = correct / len(test_data) if test_data else 0.0
124
+
125
+ # Calculate confusion matrix
126
+ confusion: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
127
+ for pred, label in zip(predictions, labels):
128
+ confusion[label][pred] += 1
129
+
130
+ # Calculate precision/recall for target classes
131
+ precision = None
132
+ recall = None
133
+ f1 = None
134
+
135
+ if classifier.target_classes:
136
+ # True positives: predicted target AND was target
137
+ tp = sum(
138
+ 1
139
+ for pred, label in zip(predictions, labels)
140
+ if pred in classifier.target_classes and label in classifier.target_classes
141
+ )
142
+ # False positives: predicted target BUT was NOT target
143
+ fp = sum(
144
+ 1
145
+ for pred, label in zip(predictions, labels)
146
+ if pred in classifier.target_classes and label not in classifier.target_classes
147
+ )
148
+ # False negatives: did NOT predict target BUT was target
149
+ fn = sum(
150
+ 1
151
+ for pred, label in zip(predictions, labels)
152
+ if pred not in classifier.target_classes and label in classifier.target_classes
153
+ )
154
+
155
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
156
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
157
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
158
+
159
+ return EvaluationResult(
160
+ accuracy=accuracy,
161
+ precision=precision,
162
+ recall=recall,
163
+ f1=f1,
164
+ confusion_matrix=dict(confusion),
165
+ total_samples=len(test_data),
166
+ total_retries=total_retries,
167
+ mean_confidence=sum(confidences) / len(confidences) if confidences else None,
168
+ errors=errors,
169
+ )
170
+
171
+
172
+ class BaseExtractor(ABC):
173
+ """
174
+ Abstract base class for all extraction strategies.
175
+
176
+ Subclasses must implement the `extract` method. This enables:
177
+ - LLMExtractor: Uses LLM with retry logic
178
+ - SchemaExtractor: Uses structured schema parsing
179
+ - Custom extractors: User-defined implementations
180
+
181
+ All extractors return ExtractorResult for consistent API.
182
+
183
+ Example:
184
+ class MyExtractor(BaseExtractor):
185
+ def extract(self, input_text: str) -> ExtractorResult:
186
+ # Custom extraction logic
187
+ return ExtractorResult(fields={"name": "John", "age": 30})
188
+ """
189
+
190
+ # Configuration (set by subclasses)
191
+ fields: Dict[str, str] = {}
192
+ name: Optional[str] = None
193
+
194
+ @abstractmethod
195
+ def extract(self, input_text: str) -> ExtractorResult:
196
+ """
197
+ Extract structured data from the input text.
198
+
199
+ Args:
200
+ input_text: The text to extract from
201
+
202
+ Returns:
203
+ ExtractorResult with fields dict and validation info
204
+ """
205
+ ...
206
+
207
+ def __call__(self, input_value: Any) -> ExtractorResult:
208
+ """
209
+ Make extractors callable.
210
+
211
+ Handles both string input and dict input (for Lua interop).
212
+
213
+ Args:
214
+ input_value: Either a string or dict with 'text'/'input' key
215
+
216
+ Returns:
217
+ ExtractorResult
218
+ """
219
+ if isinstance(input_value, dict):
220
+ text = input_value.get("text") or input_value.get("input") or str(input_value)
221
+ else:
222
+ text = str(input_value)
223
+
224
+ return self.extract(text)
225
+
226
+ def reset(self) -> None:
227
+ """
228
+ Reset any internal state.
229
+
230
+ Override in subclasses that maintain state.
231
+ """
232
+ pass
233
+
234
+
235
+ class ClassifierFactory:
236
+ """
237
+ Factory for creating classifiers based on configuration.
238
+
239
+ Supports registration of custom classifier types.
240
+ """
241
+
242
+ _registry: Dict[str, Type[BaseClassifier]] = {}
243
+
244
+ @classmethod
245
+ def register(cls, method: str, classifier_class: Type[BaseClassifier]) -> None:
246
+ """Register a classifier type."""
247
+ cls._registry[method] = classifier_class
248
+
249
+ @classmethod
250
+ def create(cls, config: Dict[str, Any], **kwargs) -> BaseClassifier:
251
+ """
252
+ Create a classifier from configuration.
253
+
254
+ Args:
255
+ config: Configuration dict with 'method' key
256
+ **kwargs: Additional kwargs passed to classifier constructor
257
+
258
+ Returns:
259
+ BaseClassifier instance
260
+
261
+ Raises:
262
+ ValueError: If method is not registered
263
+ """
264
+ method = config.get("method", "llm")
265
+
266
+ if method not in cls._registry:
267
+ available = ", ".join(cls._registry.keys())
268
+ raise ValueError(f"Unknown classifier method: '{method}'. Available: {available}")
269
+
270
+ classifier_class = cls._registry[method]
271
+ return classifier_class(config=config, **kwargs)
272
+
273
+ @classmethod
274
+ def available_methods(cls) -> List[str]:
275
+ """Get list of available classifier methods."""
276
+ return list(cls._registry.keys())
277
+
278
+
279
+ class ExtractorFactory:
280
+ """
281
+ Factory for creating extractors based on configuration.
282
+
283
+ Supports registration of custom extractor types.
284
+ """
285
+
286
+ _registry: Dict[str, Type[BaseExtractor]] = {}
287
+
288
+ @classmethod
289
+ def register(cls, method: str, extractor_class: Type[BaseExtractor]) -> None:
290
+ """Register an extractor type."""
291
+ cls._registry[method] = extractor_class
292
+
293
+ @classmethod
294
+ def create(cls, config: Dict[str, Any], **kwargs) -> BaseExtractor:
295
+ """
296
+ Create an extractor from configuration.
297
+
298
+ Args:
299
+ config: Configuration dict with 'method' key
300
+ **kwargs: Additional kwargs passed to extractor constructor
301
+
302
+ Returns:
303
+ BaseExtractor instance
304
+
305
+ Raises:
306
+ ValueError: If method is not registered
307
+ """
308
+ method = config.get("method", "llm")
309
+
310
+ if method not in cls._registry:
311
+ available = ", ".join(cls._registry.keys())
312
+ raise ValueError(f"Unknown extractor method: '{method}'. Available: {available}")
313
+
314
+ extractor_class = cls._registry[method]
315
+ return extractor_class(config=config, **kwargs)
316
+
317
+ @classmethod
318
+ def available_methods(cls) -> List[str]:
319
+ """Get list of available extractor methods."""
320
+ return list(cls._registry.keys())