tactus 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/__init__.py +18 -1
- tactus/adapters/broker_log.py +127 -34
- tactus/adapters/channels/__init__.py +153 -0
- tactus/adapters/channels/base.py +174 -0
- tactus/adapters/channels/broker.py +179 -0
- tactus/adapters/channels/cli.py +448 -0
- tactus/adapters/channels/host.py +225 -0
- tactus/adapters/channels/ipc.py +297 -0
- tactus/adapters/channels/sse.py +305 -0
- tactus/adapters/cli_hitl.py +223 -1
- tactus/adapters/control_loop.py +879 -0
- tactus/adapters/file_storage.py +35 -2
- tactus/adapters/ide_log.py +7 -1
- tactus/backends/http_backend.py +0 -1
- tactus/broker/client.py +31 -1
- tactus/broker/server.py +416 -92
- tactus/cli/app.py +270 -7
- tactus/cli/control.py +393 -0
- tactus/core/config_manager.py +33 -6
- tactus/core/dsl_stubs.py +102 -18
- tactus/core/execution_context.py +265 -8
- tactus/core/lua_sandbox.py +8 -9
- tactus/core/registry.py +19 -2
- tactus/core/runtime.py +235 -27
- tactus/docker/Dockerfile.pypi +49 -0
- tactus/docs/__init__.py +33 -0
- tactus/docs/extractor.py +326 -0
- tactus/docs/html_renderer.py +72 -0
- tactus/docs/models.py +121 -0
- tactus/docs/templates/base.html +204 -0
- tactus/docs/templates/index.html +58 -0
- tactus/docs/templates/module.html +96 -0
- tactus/dspy/agent.py +382 -22
- tactus/dspy/broker_lm.py +57 -6
- tactus/dspy/config.py +14 -3
- tactus/dspy/history.py +2 -1
- tactus/dspy/module.py +136 -11
- tactus/dspy/signature.py +0 -1
- tactus/ide/server.py +300 -9
- tactus/primitives/human.py +619 -47
- tactus/primitives/system.py +0 -1
- tactus/protocols/__init__.py +25 -0
- tactus/protocols/control.py +427 -0
- tactus/protocols/notification.py +207 -0
- tactus/sandbox/container_runner.py +79 -11
- tactus/sandbox/docker_manager.py +23 -0
- tactus/sandbox/entrypoint.py +26 -0
- tactus/sandbox/protocol.py +3 -0
- tactus/stdlib/README.md +77 -0
- tactus/stdlib/__init__.py +27 -1
- tactus/stdlib/classify/__init__.py +165 -0
- tactus/stdlib/classify/classify.spec.tac +195 -0
- tactus/stdlib/classify/classify.tac +257 -0
- tactus/stdlib/classify/fuzzy.py +282 -0
- tactus/stdlib/classify/llm.py +319 -0
- tactus/stdlib/classify/primitive.py +287 -0
- tactus/stdlib/core/__init__.py +57 -0
- tactus/stdlib/core/base.py +320 -0
- tactus/stdlib/core/confidence.py +211 -0
- tactus/stdlib/core/models.py +161 -0
- tactus/stdlib/core/retry.py +171 -0
- tactus/stdlib/core/validation.py +274 -0
- tactus/stdlib/extract/__init__.py +125 -0
- tactus/stdlib/extract/llm.py +330 -0
- tactus/stdlib/extract/primitive.py +256 -0
- tactus/stdlib/tac/tactus/classify/base.tac +51 -0
- tactus/stdlib/tac/tactus/classify/fuzzy.tac +87 -0
- tactus/stdlib/tac/tactus/classify/index.md +77 -0
- tactus/stdlib/tac/tactus/classify/init.tac +29 -0
- tactus/stdlib/tac/tactus/classify/llm.tac +150 -0
- tactus/stdlib/tac/tactus/classify.spec.tac +191 -0
- tactus/stdlib/tac/tactus/extract/base.tac +138 -0
- tactus/stdlib/tac/tactus/extract/index.md +96 -0
- tactus/stdlib/tac/tactus/extract/init.tac +27 -0
- tactus/stdlib/tac/tactus/extract/llm.tac +201 -0
- tactus/stdlib/tac/tactus/extract.spec.tac +153 -0
- tactus/stdlib/tac/tactus/generate/base.tac +142 -0
- tactus/stdlib/tac/tactus/generate/index.md +195 -0
- tactus/stdlib/tac/tactus/generate/init.tac +28 -0
- tactus/stdlib/tac/tactus/generate/llm.tac +169 -0
- tactus/stdlib/tac/tactus/generate.spec.tac +210 -0
- tactus/testing/behave_integration.py +171 -7
- tactus/testing/context.py +0 -1
- tactus/testing/evaluation_runner.py +0 -1
- tactus/testing/gherkin_parser.py +0 -1
- tactus/testing/mock_hitl.py +0 -1
- tactus/testing/mock_tools.py +0 -1
- tactus/testing/models.py +0 -1
- tactus/testing/steps/builtin.py +0 -1
- tactus/testing/steps/custom.py +81 -22
- tactus/testing/steps/registry.py +0 -1
- tactus/testing/test_runner.py +7 -1
- tactus/validation/semantic_visitor.py +11 -5
- tactus/validation/validator.py +0 -1
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/METADATA +14 -2
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/RECORD +100 -49
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/WHEEL +0 -0
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/entry_points.txt +0 -0
- {tactus-0.33.0.dist-info → tactus-0.34.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLMExtractor - Information extraction using Language Models with retry logic.
|
|
3
|
+
|
|
4
|
+
This extractor uses an LLM (via agent_factory) to extract structured data from text,
|
|
5
|
+
with built-in retry logic and JSON schema validation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import re
|
|
11
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from ..core.base import BaseExtractor
|
|
14
|
+
from ..core.models import ExtractorResult
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LLMExtractor(BaseExtractor):
|
|
20
|
+
"""
|
|
21
|
+
LLM-based extractor with automatic retry and validation.
|
|
22
|
+
|
|
23
|
+
Uses conversational feedback to help the LLM self-correct when it
|
|
24
|
+
returns invalid or incomplete extractions.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
extractor = LLMExtractor(
|
|
28
|
+
fields={"name": "string", "age": "number", "email": "string"},
|
|
29
|
+
prompt="Extract customer information from this text",
|
|
30
|
+
agent_factory=my_agent_factory,
|
|
31
|
+
)
|
|
32
|
+
result = extractor.extract("John Smith is 34 years old. Contact: john@example.com")
|
|
33
|
+
# result.fields = {"name": "John Smith", "age": 34, "email": "john@example.com"}
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
fields: Dict[str, str],
|
|
39
|
+
prompt: str,
|
|
40
|
+
agent_factory: Callable[[Dict[str, Any]], Any],
|
|
41
|
+
max_retries: int = 3,
|
|
42
|
+
temperature: float = 0.3,
|
|
43
|
+
model: Optional[str] = None,
|
|
44
|
+
strict: bool = True,
|
|
45
|
+
name: Optional[str] = None,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
Initialize LLMExtractor.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
fields: Dict mapping field names to their types
|
|
52
|
+
(e.g., {"name": "string", "age": "number", "items": "list"})
|
|
53
|
+
prompt: Extraction instruction/prompt
|
|
54
|
+
agent_factory: Factory function to create Agent instances
|
|
55
|
+
max_retries: Maximum retry attempts on invalid output
|
|
56
|
+
temperature: LLM temperature for extraction
|
|
57
|
+
model: Specific model to use (optional)
|
|
58
|
+
strict: If True, all fields must be extracted; if False, missing fields are OK
|
|
59
|
+
name: Optional name for this extractor
|
|
60
|
+
"""
|
|
61
|
+
self.fields = fields
|
|
62
|
+
self.name = name
|
|
63
|
+
self.prompt = prompt
|
|
64
|
+
self.agent_factory = agent_factory
|
|
65
|
+
self.max_retries = max_retries
|
|
66
|
+
self.temperature = temperature
|
|
67
|
+
self.model = model
|
|
68
|
+
self.strict = strict
|
|
69
|
+
|
|
70
|
+
# Build extraction system prompt
|
|
71
|
+
self._system_prompt = self._build_system_prompt()
|
|
72
|
+
|
|
73
|
+
# Create agent for extraction
|
|
74
|
+
self._agent = self._create_agent()
|
|
75
|
+
|
|
76
|
+
# Track statistics
|
|
77
|
+
self.total_calls = 0
|
|
78
|
+
self.total_retries = 0
|
|
79
|
+
|
|
80
|
+
def _build_system_prompt(self) -> str:
|
|
81
|
+
"""Build the extraction system prompt."""
|
|
82
|
+
fields_description = "\n".join(
|
|
83
|
+
f" - {name}: {type_}" for name, type_ in self.fields.items()
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return f"""You are an information extraction assistant. Your task is to extract structured data according to the following instruction:
|
|
87
|
+
|
|
88
|
+
{self.prompt}
|
|
89
|
+
|
|
90
|
+
FIELDS TO EXTRACT:
|
|
91
|
+
{fields_description}
|
|
92
|
+
|
|
93
|
+
IMPORTANT RULES:
|
|
94
|
+
1. You MUST respond with a valid JSON object containing the extracted fields.
|
|
95
|
+
2. Include ONLY the specified fields in your response.
|
|
96
|
+
3. Use null for fields that cannot be extracted from the input.
|
|
97
|
+
4. For "number" fields, return numeric values (not strings).
|
|
98
|
+
5. For "list" fields, return JSON arrays.
|
|
99
|
+
6. For "boolean" fields, return true or false.
|
|
100
|
+
7. Do NOT include any explanation or text outside the JSON.
|
|
101
|
+
|
|
102
|
+
RESPONSE FORMAT:
|
|
103
|
+
{{
|
|
104
|
+
"field1": "extracted value",
|
|
105
|
+
"field2": 123,
|
|
106
|
+
...
|
|
107
|
+
}}
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
def _create_agent(self) -> Any:
|
|
111
|
+
"""Create the internal Agent for extraction."""
|
|
112
|
+
if self.agent_factory is None:
|
|
113
|
+
raise RuntimeError("LLMExtractor requires agent_factory")
|
|
114
|
+
|
|
115
|
+
agent_config = {
|
|
116
|
+
"system_prompt": self._system_prompt,
|
|
117
|
+
"temperature": self.temperature,
|
|
118
|
+
}
|
|
119
|
+
if self.model:
|
|
120
|
+
agent_config["model"] = self.model
|
|
121
|
+
|
|
122
|
+
return self.agent_factory(agent_config)
|
|
123
|
+
|
|
124
|
+
def extract(self, input_text: str) -> ExtractorResult:
|
|
125
|
+
"""
|
|
126
|
+
Extract structured data from the input text with retry logic.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
input_text: The text to extract from
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
ExtractorResult with fields dict and validation info
|
|
133
|
+
"""
|
|
134
|
+
self.total_calls += 1
|
|
135
|
+
|
|
136
|
+
# Reset agent conversation for fresh extraction
|
|
137
|
+
if hasattr(self._agent, "reset"):
|
|
138
|
+
self._agent.reset()
|
|
139
|
+
|
|
140
|
+
retry_count = 0
|
|
141
|
+
last_response = None
|
|
142
|
+
validation_errors = []
|
|
143
|
+
|
|
144
|
+
for attempt in range(self.max_retries + 1):
|
|
145
|
+
# Build the message for this attempt
|
|
146
|
+
if attempt == 0:
|
|
147
|
+
message = f"Please extract the following information:\n\n{input_text}"
|
|
148
|
+
else:
|
|
149
|
+
# Retry with feedback
|
|
150
|
+
retry_count += 1
|
|
151
|
+
self.total_retries += 1
|
|
152
|
+
message = self._build_retry_feedback(last_response, validation_errors)
|
|
153
|
+
logger.debug(f"Extraction retry {retry_count}: {message[:100]}...")
|
|
154
|
+
|
|
155
|
+
# Call the agent
|
|
156
|
+
try:
|
|
157
|
+
result = self._call_agent(message)
|
|
158
|
+
last_response = result.get("response") or result.get("message") or str(result)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Agent call failed: {e}")
|
|
161
|
+
return ExtractorResult(
|
|
162
|
+
fields={},
|
|
163
|
+
error=str(e),
|
|
164
|
+
retry_count=retry_count,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Parse the response
|
|
168
|
+
parsed, validation_errors = self._parse_response(last_response)
|
|
169
|
+
|
|
170
|
+
# Check if extraction is valid
|
|
171
|
+
if not validation_errors:
|
|
172
|
+
return ExtractorResult(
|
|
173
|
+
fields=parsed,
|
|
174
|
+
retry_count=retry_count,
|
|
175
|
+
raw_response=last_response,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
logger.debug(f"Invalid extraction: {validation_errors}")
|
|
179
|
+
|
|
180
|
+
# All retries exhausted
|
|
181
|
+
logger.warning(f"Extraction failed after {self.max_retries} retries")
|
|
182
|
+
return ExtractorResult(
|
|
183
|
+
fields=parsed if "parsed" in dir() else {},
|
|
184
|
+
validation_errors=validation_errors,
|
|
185
|
+
error=f"Max retries ({self.max_retries}) exceeded. Validation errors: {validation_errors}",
|
|
186
|
+
retry_count=retry_count,
|
|
187
|
+
raw_response=last_response,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def _call_agent(self, message: str) -> Dict[str, Any]:
|
|
191
|
+
"""Call the internal agent with a message."""
|
|
192
|
+
input_dict = {"message": message}
|
|
193
|
+
result = self._agent(input_dict)
|
|
194
|
+
|
|
195
|
+
# Convert result to dict
|
|
196
|
+
if hasattr(result, "to_dict"):
|
|
197
|
+
return result.to_dict()
|
|
198
|
+
if hasattr(result, "message"):
|
|
199
|
+
return {"response": result.message}
|
|
200
|
+
if hasattr(result, "response"):
|
|
201
|
+
return {"response": result.response}
|
|
202
|
+
if isinstance(result, dict):
|
|
203
|
+
return result
|
|
204
|
+
|
|
205
|
+
return {"response": str(result)}
|
|
206
|
+
|
|
207
|
+
def _build_retry_feedback(self, last_response: str, errors: List[str]) -> str:
|
|
208
|
+
"""Build feedback message for retry."""
|
|
209
|
+
errors_str = "\n".join(f" - {e}" for e in errors)
|
|
210
|
+
fields_str = ", ".join(f'"{f}"' for f in self.fields.keys())
|
|
211
|
+
|
|
212
|
+
return f"""Your previous response was not valid JSON or had validation errors.
|
|
213
|
+
|
|
214
|
+
Previous response:
|
|
215
|
+
{last_response[:500]}
|
|
216
|
+
|
|
217
|
+
Errors:
|
|
218
|
+
{errors_str}
|
|
219
|
+
|
|
220
|
+
Please respond with ONLY a valid JSON object containing these fields: {fields_str}
|
|
221
|
+
|
|
222
|
+
Do NOT include any explanation or text outside the JSON object."""
|
|
223
|
+
|
|
224
|
+
def _parse_response(self, response: str) -> tuple[Dict[str, Any], List[str]]:
|
|
225
|
+
"""
|
|
226
|
+
Parse extraction response and validate against schema.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Tuple of (extracted_fields, validation_errors)
|
|
230
|
+
"""
|
|
231
|
+
if not response:
|
|
232
|
+
return {}, ["Empty response"]
|
|
233
|
+
|
|
234
|
+
# Try to extract JSON from response
|
|
235
|
+
json_match = re.search(r"\{[^{}]*\}", response, re.DOTALL)
|
|
236
|
+
if not json_match:
|
|
237
|
+
# Try to find JSON with nested braces
|
|
238
|
+
json_match = re.search(r"\{.*\}", response, re.DOTALL)
|
|
239
|
+
|
|
240
|
+
if not json_match:
|
|
241
|
+
return {}, ["No JSON object found in response"]
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
parsed = json.loads(json_match.group())
|
|
245
|
+
except json.JSONDecodeError as e:
|
|
246
|
+
return {}, [f"Invalid JSON: {e}"]
|
|
247
|
+
|
|
248
|
+
# Validate fields
|
|
249
|
+
validation_errors = []
|
|
250
|
+
result = {}
|
|
251
|
+
|
|
252
|
+
for field_name, field_type in self.fields.items():
|
|
253
|
+
if field_name not in parsed:
|
|
254
|
+
if self.strict:
|
|
255
|
+
validation_errors.append(f"Missing required field: {field_name}")
|
|
256
|
+
result[field_name] = None
|
|
257
|
+
else:
|
|
258
|
+
value = parsed[field_name]
|
|
259
|
+
validated, error = self._validate_field(field_name, value, field_type)
|
|
260
|
+
if error:
|
|
261
|
+
validation_errors.append(error)
|
|
262
|
+
result[field_name] = validated
|
|
263
|
+
|
|
264
|
+
return result, validation_errors
|
|
265
|
+
|
|
266
|
+
def _validate_field(
|
|
267
|
+
self, field_name: str, value: Any, field_type: str
|
|
268
|
+
) -> tuple[Any, Optional[str]]:
|
|
269
|
+
"""
|
|
270
|
+
Validate a field value against its type.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Tuple of (validated_value, error_message or None)
|
|
274
|
+
"""
|
|
275
|
+
if value is None:
|
|
276
|
+
return None, None
|
|
277
|
+
|
|
278
|
+
type_lower = field_type.lower()
|
|
279
|
+
|
|
280
|
+
if type_lower == "string":
|
|
281
|
+
return str(value), None
|
|
282
|
+
|
|
283
|
+
elif type_lower == "number":
|
|
284
|
+
if isinstance(value, (int, float)):
|
|
285
|
+
return value, None
|
|
286
|
+
try:
|
|
287
|
+
return float(value), None
|
|
288
|
+
except (ValueError, TypeError):
|
|
289
|
+
return None, f"Field '{field_name}' must be a number, got: {type(value).__name__}"
|
|
290
|
+
|
|
291
|
+
elif type_lower == "integer":
|
|
292
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
293
|
+
return value, None
|
|
294
|
+
try:
|
|
295
|
+
return int(float(value)), None
|
|
296
|
+
except (ValueError, TypeError):
|
|
297
|
+
return None, f"Field '{field_name}' must be an integer, got: {type(value).__name__}"
|
|
298
|
+
|
|
299
|
+
elif type_lower == "boolean":
|
|
300
|
+
if isinstance(value, bool):
|
|
301
|
+
return value, None
|
|
302
|
+
if isinstance(value, str):
|
|
303
|
+
if value.lower() in ("true", "yes", "1"):
|
|
304
|
+
return True, None
|
|
305
|
+
if value.lower() in ("false", "no", "0"):
|
|
306
|
+
return False, None
|
|
307
|
+
return None, f"Field '{field_name}' must be a boolean, got: {value}"
|
|
308
|
+
|
|
309
|
+
elif type_lower in ("list", "array"):
|
|
310
|
+
if isinstance(value, list):
|
|
311
|
+
return value, None
|
|
312
|
+
return None, f"Field '{field_name}' must be a list, got: {type(value).__name__}"
|
|
313
|
+
|
|
314
|
+
elif type_lower in ("dict", "object"):
|
|
315
|
+
if isinstance(value, dict):
|
|
316
|
+
return value, None
|
|
317
|
+
return None, f"Field '{field_name}' must be an object, got: {type(value).__name__}"
|
|
318
|
+
|
|
319
|
+
else:
|
|
320
|
+
# Unknown type, accept any value
|
|
321
|
+
return value, None
|
|
322
|
+
|
|
323
|
+
def reset(self) -> None:
|
|
324
|
+
"""Reset the extractor state (clear agent conversation)."""
|
|
325
|
+
if hasattr(self._agent, "reset"):
|
|
326
|
+
self._agent.reset()
|
|
327
|
+
|
|
328
|
+
def __repr__(self) -> str:
|
|
329
|
+
fields_str = ", ".join(self.fields.keys())
|
|
330
|
+
return f"LLMExtractor(fields=[{fields_str}], calls={self.total_calls}, retries={self.total_retries})"
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ExtractPrimitive - Smart information extraction with built-in retry and validation.
|
|
3
|
+
|
|
4
|
+
This primitive wraps the extraction infrastructure to provide:
|
|
5
|
+
- Automatic retry with conversational feedback
|
|
6
|
+
- JSON schema validation
|
|
7
|
+
- Type coercion for extracted fields
|
|
8
|
+
- Structured result format
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from typing import Any, Dict
|
|
13
|
+
|
|
14
|
+
from ..core.base import BaseExtractor, ExtractorFactory
|
|
15
|
+
from ..core.models import ExtractorResult
|
|
16
|
+
from .llm import LLMExtractor
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
__all__ = ["ExtractPrimitive", "ExtractHandle", "ExtractorResult"]
|
|
21
|
+
|
|
22
|
+
# Register the LLM extractor as the default method
|
|
23
|
+
ExtractorFactory.register("llm", LLMExtractor)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ExtractHandle:
|
|
27
|
+
"""
|
|
28
|
+
A reusable extractor handle for Lua interop.
|
|
29
|
+
|
|
30
|
+
This is a wrapper around a BaseExtractor that handles
|
|
31
|
+
Lua table conversion.
|
|
32
|
+
|
|
33
|
+
Created by Extract { ... } and can be called multiple times.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
extractor: BaseExtractor,
|
|
39
|
+
lua_table_from: Any = None,
|
|
40
|
+
):
|
|
41
|
+
"""
|
|
42
|
+
Initialize ExtractHandle.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
extractor: The underlying BaseExtractor instance
|
|
46
|
+
lua_table_from: Function to convert Python dicts to Lua tables
|
|
47
|
+
"""
|
|
48
|
+
self._extractor = extractor
|
|
49
|
+
self.lua_table_from = lua_table_from
|
|
50
|
+
|
|
51
|
+
# Expose extractor attributes
|
|
52
|
+
self.fields = extractor.fields
|
|
53
|
+
|
|
54
|
+
# For test access
|
|
55
|
+
self._agent = getattr(extractor, "_agent", None)
|
|
56
|
+
|
|
57
|
+
def __call__(self, input_value: Any) -> ExtractorResult:
|
|
58
|
+
"""
|
|
59
|
+
Extract from the input.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
input_value: Input text or dict with 'text' field
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
ExtractorResult with fields dict and validation info
|
|
66
|
+
"""
|
|
67
|
+
# Extract text from input
|
|
68
|
+
if isinstance(input_value, dict):
|
|
69
|
+
text = input_value.get("text") or input_value.get("input") or str(input_value)
|
|
70
|
+
else:
|
|
71
|
+
text = str(input_value)
|
|
72
|
+
|
|
73
|
+
return self._extractor.extract(text)
|
|
74
|
+
|
|
75
|
+
def reset(self):
|
|
76
|
+
"""Reset the extractor state."""
|
|
77
|
+
self._extractor.reset()
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def total_calls(self) -> int:
|
|
81
|
+
"""Get total number of calls made."""
|
|
82
|
+
return getattr(self._extractor, "total_calls", 0)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def total_retries(self) -> int:
|
|
86
|
+
"""Get total number of retries."""
|
|
87
|
+
return getattr(self._extractor, "total_retries", 0)
|
|
88
|
+
|
|
89
|
+
def __repr__(self) -> str:
|
|
90
|
+
return f"ExtractHandle(extractor={self._extractor})"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ExtractPrimitive:
|
|
94
|
+
"""
|
|
95
|
+
Smart extraction primitive with retry logic.
|
|
96
|
+
|
|
97
|
+
Follows the Agent pattern - can be configured once and called multiple times,
|
|
98
|
+
or used as a one-shot extractor.
|
|
99
|
+
|
|
100
|
+
Example usage in Lua:
|
|
101
|
+
-- One-shot extraction
|
|
102
|
+
data = Extract {
|
|
103
|
+
fields = {name = "string", age = "number", email = "string"},
|
|
104
|
+
prompt = "Extract customer information",
|
|
105
|
+
input = transcript
|
|
106
|
+
}
|
|
107
|
+
-- data.name = "John Smith"
|
|
108
|
+
-- data.age = 34
|
|
109
|
+
-- data.email = "john@example.com"
|
|
110
|
+
|
|
111
|
+
-- Reusable extractor
|
|
112
|
+
customer_extractor = Extract {
|
|
113
|
+
fields = {name = "string", age = "number"},
|
|
114
|
+
prompt = "Extract customer information"
|
|
115
|
+
}
|
|
116
|
+
data1 = customer_extractor(text1)
|
|
117
|
+
data2 = customer_extractor(text2)
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
agent_factory: Any,
|
|
123
|
+
lua_table_from: Any = None,
|
|
124
|
+
registry: Any = None,
|
|
125
|
+
mock_manager: Any = None,
|
|
126
|
+
):
|
|
127
|
+
"""
|
|
128
|
+
Initialize ExtractPrimitive.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
agent_factory: Factory function to create Agent instances
|
|
132
|
+
lua_table_from: Function to convert Python dicts to Lua tables
|
|
133
|
+
registry: Optional registry for accessing mocks
|
|
134
|
+
mock_manager: Optional mock manager for testing
|
|
135
|
+
"""
|
|
136
|
+
self.agent_factory = agent_factory
|
|
137
|
+
self.lua_table_from = lua_table_from
|
|
138
|
+
self.registry = registry
|
|
139
|
+
self.mock_manager = mock_manager
|
|
140
|
+
|
|
141
|
+
def __call__(self, config: Dict[str, Any]) -> Any:
|
|
142
|
+
"""
|
|
143
|
+
Create an extractor from configuration.
|
|
144
|
+
|
|
145
|
+
This is called when Lua does: Extract { ... }
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
config: Extraction configuration
|
|
149
|
+
- fields: Dict mapping field names to types (required)
|
|
150
|
+
- prompt: Extraction instruction (required)
|
|
151
|
+
- input: Optional input for one-shot extraction
|
|
152
|
+
- method: Extraction method ("llm", default: "llm")
|
|
153
|
+
- max_retries: Maximum retry attempts (default: 3)
|
|
154
|
+
- temperature: Model temperature (default: 0.3)
|
|
155
|
+
- model: Model to use (optional)
|
|
156
|
+
- strict: Whether all fields are required (default: True)
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
ExtractHandle if no input provided (reusable)
|
|
160
|
+
dict if input provided (one-shot result)
|
|
161
|
+
"""
|
|
162
|
+
# Convert Lua table to Python dict
|
|
163
|
+
config = self._lua_to_python(config)
|
|
164
|
+
|
|
165
|
+
# Validate required fields
|
|
166
|
+
fields = config.get("fields")
|
|
167
|
+
if not fields:
|
|
168
|
+
raise ValueError("Extract requires 'fields' field")
|
|
169
|
+
|
|
170
|
+
prompt = config.get("prompt")
|
|
171
|
+
if not prompt:
|
|
172
|
+
raise ValueError("Extract requires 'prompt' field")
|
|
173
|
+
|
|
174
|
+
# Create the extractor using the factory
|
|
175
|
+
extractor = self._create_extractor(config)
|
|
176
|
+
|
|
177
|
+
# Wrap in handle for Lua interop
|
|
178
|
+
handle = ExtractHandle(
|
|
179
|
+
extractor=extractor,
|
|
180
|
+
lua_table_from=self.lua_table_from,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# If input is provided, do one-shot extraction
|
|
184
|
+
input_text = config.get("input")
|
|
185
|
+
if input_text is not None:
|
|
186
|
+
result = handle(input_text)
|
|
187
|
+
# Return extracted fields as a flat dict for Lua convenience
|
|
188
|
+
return self._to_lua_table(result.to_lua_dict())
|
|
189
|
+
|
|
190
|
+
return handle
|
|
191
|
+
|
|
192
|
+
def _create_extractor(self, config: Dict[str, Any]) -> BaseExtractor:
|
|
193
|
+
"""
|
|
194
|
+
Create an extractor based on configuration.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
config: Configuration dict
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
BaseExtractor instance
|
|
201
|
+
"""
|
|
202
|
+
method = config.get("method", "llm")
|
|
203
|
+
|
|
204
|
+
if method == "llm":
|
|
205
|
+
return LLMExtractor(
|
|
206
|
+
fields=config["fields"],
|
|
207
|
+
prompt=config["prompt"],
|
|
208
|
+
agent_factory=self.agent_factory,
|
|
209
|
+
max_retries=config.get("max_retries", 3),
|
|
210
|
+
temperature=config.get("temperature", 0.3),
|
|
211
|
+
model=config.get("model"),
|
|
212
|
+
strict=config.get("strict", True),
|
|
213
|
+
name=config.get("name"),
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
# Use the factory for other methods
|
|
217
|
+
factory_config = {**config, "agent_factory": self.agent_factory}
|
|
218
|
+
return ExtractorFactory.create(factory_config)
|
|
219
|
+
|
|
220
|
+
def _lua_to_python(self, value: Any) -> Any:
|
|
221
|
+
"""Convert Lua table to Python dict recursively."""
|
|
222
|
+
if value is None:
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
from lupa import lua_type
|
|
227
|
+
|
|
228
|
+
if lua_type(value) == "table":
|
|
229
|
+
# Check if it's an array (1-indexed sequential keys)
|
|
230
|
+
result = {}
|
|
231
|
+
max_int_key = 0
|
|
232
|
+
has_string_keys = False
|
|
233
|
+
|
|
234
|
+
for k, v in value.items():
|
|
235
|
+
if isinstance(k, int):
|
|
236
|
+
max_int_key = max(max_int_key, k)
|
|
237
|
+
else:
|
|
238
|
+
has_string_keys = True
|
|
239
|
+
result[k] = self._lua_to_python(v)
|
|
240
|
+
|
|
241
|
+
# If all keys are sequential integers 1..n, convert to list
|
|
242
|
+
if not has_string_keys and max_int_key == len(result):
|
|
243
|
+
return [result[i] for i in range(1, max_int_key + 1)]
|
|
244
|
+
|
|
245
|
+
return result
|
|
246
|
+
return value
|
|
247
|
+
except ImportError:
|
|
248
|
+
return value
|
|
249
|
+
|
|
250
|
+
def _to_lua_table(self, value: Any) -> Any:
|
|
251
|
+
"""Convert Python value to Lua table."""
|
|
252
|
+
if self.lua_table_from is None:
|
|
253
|
+
return value
|
|
254
|
+
if isinstance(value, dict):
|
|
255
|
+
return self.lua_table_from(value)
|
|
256
|
+
return value
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
-- Base Classification Class
|
|
2
|
+
--
|
|
3
|
+
-- This module provides the foundation for all classifiers:
|
|
4
|
+
-- - class() helper for Lua OOP with inheritance
|
|
5
|
+
-- - BaseClassifier abstract base class
|
|
6
|
+
|
|
7
|
+
-- Simple class system for Lua
|
|
8
|
+
local function class(base)
|
|
9
|
+
local c = {}
|
|
10
|
+
if base then
|
|
11
|
+
for k, v in pairs(base) do
|
|
12
|
+
c[k] = v
|
|
13
|
+
end
|
|
14
|
+
c._base = base
|
|
15
|
+
end
|
|
16
|
+
c.__index = c
|
|
17
|
+
|
|
18
|
+
function c:new(config)
|
|
19
|
+
local instance = setmetatable({}, self)
|
|
20
|
+
if instance.init then
|
|
21
|
+
instance:init(config)
|
|
22
|
+
end
|
|
23
|
+
return instance
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
return c
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
-- ============================================================================
|
|
30
|
+
-- BaseClassifier (Abstract Base Class)
|
|
31
|
+
-- ============================================================================
|
|
32
|
+
|
|
33
|
+
local BaseClassifier = class()
|
|
34
|
+
|
|
35
|
+
function BaseClassifier:init(config)
|
|
36
|
+
self.config = config or {}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
function BaseClassifier:classify(text)
|
|
40
|
+
error("BaseClassifier.classify() must be implemented by subclass")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
function BaseClassifier:__call(text)
|
|
44
|
+
return self:classify(text)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
-- Export classes and helpers
|
|
48
|
+
return {
|
|
49
|
+
class = class,
|
|
50
|
+
BaseClassifier = BaseClassifier,
|
|
51
|
+
}
|