tactus 0.31.0__py3-none-any.whl → 0.34.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/__init__.py +18 -1
- tactus/adapters/broker_log.py +127 -34
- tactus/adapters/channels/__init__.py +153 -0
- tactus/adapters/channels/base.py +174 -0
- tactus/adapters/channels/broker.py +179 -0
- tactus/adapters/channels/cli.py +448 -0
- tactus/adapters/channels/host.py +225 -0
- tactus/adapters/channels/ipc.py +297 -0
- tactus/adapters/channels/sse.py +305 -0
- tactus/adapters/cli_hitl.py +223 -1
- tactus/adapters/control_loop.py +879 -0
- tactus/adapters/file_storage.py +35 -2
- tactus/adapters/ide_log.py +7 -1
- tactus/backends/http_backend.py +0 -1
- tactus/broker/client.py +31 -1
- tactus/broker/server.py +416 -92
- tactus/cli/app.py +270 -7
- tactus/cli/control.py +393 -0
- tactus/core/config_manager.py +33 -6
- tactus/core/dsl_stubs.py +102 -18
- tactus/core/execution_context.py +265 -8
- tactus/core/lua_sandbox.py +8 -9
- tactus/core/registry.py +19 -2
- tactus/core/runtime.py +235 -27
- tactus/docker/Dockerfile.pypi +49 -0
- tactus/docs/__init__.py +33 -0
- tactus/docs/extractor.py +326 -0
- tactus/docs/html_renderer.py +72 -0
- tactus/docs/models.py +121 -0
- tactus/docs/templates/base.html +204 -0
- tactus/docs/templates/index.html +58 -0
- tactus/docs/templates/module.html +96 -0
- tactus/dspy/agent.py +403 -22
- tactus/dspy/broker_lm.py +57 -6
- tactus/dspy/config.py +14 -3
- tactus/dspy/history.py +2 -1
- tactus/dspy/module.py +136 -11
- tactus/dspy/signature.py +0 -1
- tactus/ide/config_server.py +536 -0
- tactus/ide/server.py +345 -21
- tactus/primitives/human.py +619 -47
- tactus/primitives/system.py +0 -1
- tactus/protocols/__init__.py +25 -0
- tactus/protocols/control.py +427 -0
- tactus/protocols/notification.py +207 -0
- tactus/sandbox/container_runner.py +79 -11
- tactus/sandbox/docker_manager.py +23 -0
- tactus/sandbox/entrypoint.py +26 -0
- tactus/sandbox/protocol.py +3 -0
- tactus/stdlib/README.md +77 -0
- tactus/stdlib/__init__.py +27 -1
- tactus/stdlib/classify/__init__.py +165 -0
- tactus/stdlib/classify/classify.spec.tac +195 -0
- tactus/stdlib/classify/classify.tac +257 -0
- tactus/stdlib/classify/fuzzy.py +282 -0
- tactus/stdlib/classify/llm.py +319 -0
- tactus/stdlib/classify/primitive.py +287 -0
- tactus/stdlib/core/__init__.py +57 -0
- tactus/stdlib/core/base.py +320 -0
- tactus/stdlib/core/confidence.py +211 -0
- tactus/stdlib/core/models.py +161 -0
- tactus/stdlib/core/retry.py +171 -0
- tactus/stdlib/core/validation.py +274 -0
- tactus/stdlib/extract/__init__.py +125 -0
- tactus/stdlib/extract/llm.py +330 -0
- tactus/stdlib/extract/primitive.py +256 -0
- tactus/stdlib/tac/tactus/classify/base.tac +51 -0
- tactus/stdlib/tac/tactus/classify/fuzzy.tac +87 -0
- tactus/stdlib/tac/tactus/classify/index.md +77 -0
- tactus/stdlib/tac/tactus/classify/init.tac +29 -0
- tactus/stdlib/tac/tactus/classify/llm.tac +150 -0
- tactus/stdlib/tac/tactus/classify.spec.tac +191 -0
- tactus/stdlib/tac/tactus/extract/base.tac +138 -0
- tactus/stdlib/tac/tactus/extract/index.md +96 -0
- tactus/stdlib/tac/tactus/extract/init.tac +27 -0
- tactus/stdlib/tac/tactus/extract/llm.tac +201 -0
- tactus/stdlib/tac/tactus/extract.spec.tac +153 -0
- tactus/stdlib/tac/tactus/generate/base.tac +142 -0
- tactus/stdlib/tac/tactus/generate/index.md +195 -0
- tactus/stdlib/tac/tactus/generate/init.tac +28 -0
- tactus/stdlib/tac/tactus/generate/llm.tac +169 -0
- tactus/stdlib/tac/tactus/generate.spec.tac +210 -0
- tactus/testing/behave_integration.py +171 -7
- tactus/testing/context.py +0 -1
- tactus/testing/evaluation_runner.py +0 -1
- tactus/testing/gherkin_parser.py +0 -1
- tactus/testing/mock_hitl.py +0 -1
- tactus/testing/mock_tools.py +0 -1
- tactus/testing/models.py +0 -1
- tactus/testing/steps/builtin.py +0 -1
- tactus/testing/steps/custom.py +81 -22
- tactus/testing/steps/registry.py +0 -1
- tactus/testing/test_runner.py +7 -1
- tactus/validation/semantic_visitor.py +11 -5
- tactus/validation/validator.py +0 -1
- {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/METADATA +16 -2
- {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/RECORD +101 -49
- {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/WHEEL +0 -0
- {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/entry_points.txt +0 -0
- {tactus-0.31.0.dist-info → tactus-0.34.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Extraction Module
|
|
2
|
+
|
|
3
|
+
The `tactus.extract` module provides structured data extraction from unstructured text using LLM-based analysis.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
All extractors extend `BaseExtractor` and share a common interface. This enables consistent usage patterns and makes it easy to add new extraction strategies in the future.
|
|
8
|
+
|
|
9
|
+
## When to Use
|
|
10
|
+
|
|
11
|
+
- **LLMExtractor**: Use when you need to extract structured fields from natural language text. Ideal for forms, documents, conversations, and any unstructured data where field values aren't in a predictable format.
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
The module uses a proper Lua class hierarchy:
|
|
16
|
+
|
|
17
|
+
- `BaseExtractor` - Abstract base with common interface and field validation
|
|
18
|
+
- `LLMExtractor` - LLM-powered extraction with automatic retry logic
|
|
19
|
+
|
|
20
|
+
All extractors return a consistent result format:
|
|
21
|
+
|
|
22
|
+
```lua
|
|
23
|
+
{
|
|
24
|
+
fields = { -- Extracted field values
|
|
25
|
+
name = "John Smith",
|
|
26
|
+
age = 34,
|
|
27
|
+
email = "john@example.com"
|
|
28
|
+
},
|
|
29
|
+
retry_count = 0, -- Number of retries needed
|
|
30
|
+
raw_response = "...", -- LLM response (LLM only)
|
|
31
|
+
error = nil, -- Error message if failed
|
|
32
|
+
validation_errors = {} -- List of validation errors
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Loading the Module
|
|
37
|
+
|
|
38
|
+
```lua
|
|
39
|
+
-- Load the main module
|
|
40
|
+
local extract = require("tactus.extract")
|
|
41
|
+
|
|
42
|
+
-- Or load specific extractors (dependencies auto-load)
|
|
43
|
+
local LLMExtractor = require("tactus.extract.llm")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Field Types
|
|
47
|
+
|
|
48
|
+
LLMExtractor supports these field types for validation:
|
|
49
|
+
|
|
50
|
+
| Type | Description | Example |
|
|
51
|
+
|------|-------------|---------|
|
|
52
|
+
| `string` | Text values | `"John Smith"` |
|
|
53
|
+
| `number` | Numeric values (float) | `34.5` |
|
|
54
|
+
| `integer` | Whole numbers | `34` |
|
|
55
|
+
| `boolean` | True/false values | `true` |
|
|
56
|
+
| `list`/`array` | JSON arrays | `["a", "b", "c"]` |
|
|
57
|
+
| `object`/`dict` | JSON objects | `{key = "value"}` |
|
|
58
|
+
|
|
59
|
+
## Performance Notes
|
|
60
|
+
|
|
61
|
+
- LLM extraction typically takes 1-3 seconds per call
|
|
62
|
+
- Retry logic adds latency for malformed responses
|
|
63
|
+
- Consider caching extraction results for repeated operations
|
|
64
|
+
- Use the `strict` parameter to control validation behavior
|
|
65
|
+
|
|
66
|
+
## Extending Extractors
|
|
67
|
+
|
|
68
|
+
You can extend `BaseExtractor` to create custom extractors:
|
|
69
|
+
|
|
70
|
+
```lua
|
|
71
|
+
local base = require("tactus.extract.base")
|
|
72
|
+
local class = base.class
|
|
73
|
+
local BaseExtractor = base.BaseExtractor
|
|
74
|
+
|
|
75
|
+
MyExtractor = class(BaseExtractor)
|
|
76
|
+
|
|
77
|
+
function MyExtractor:init(config)
|
|
78
|
+
BaseExtractor.init(self, config)
|
|
79
|
+
-- Your initialization
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
function MyExtractor:extract(text)
|
|
83
|
+
-- Your extraction logic
|
|
84
|
+
local fields = {}
|
|
85
|
+
-- ... populate fields ...
|
|
86
|
+
|
|
87
|
+
-- Validate against schema
|
|
88
|
+
local validated, errors = self:validate_fields(fields, self.fields)
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
fields = validated,
|
|
92
|
+
validation_errors = errors,
|
|
93
|
+
retry_count = 0
|
|
94
|
+
}
|
|
95
|
+
end
|
|
96
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
-- Tactus Extraction Module
|
|
2
|
+
--
|
|
3
|
+
-- Provides structured data extraction from text with:
|
|
4
|
+
-- - LLM-based extraction (tactus.extract.llm)
|
|
5
|
+
-- - Field validation and type coercion
|
|
6
|
+
-- - Extensible base class (tactus.extract.base)
|
|
7
|
+
--
|
|
8
|
+
-- Usage:
|
|
9
|
+
-- local extract = require("tactus.extract")
|
|
10
|
+
-- local extractor = extract.LLMExtractor:new{...}
|
|
11
|
+
--
|
|
12
|
+
-- Or load specific extractors:
|
|
13
|
+
-- local LLMExtractor = require("tactus.extract.llm")
|
|
14
|
+
|
|
15
|
+
-- Load all submodules
|
|
16
|
+
local base = require("tactus.extract.base")
|
|
17
|
+
local llm = require("tactus.extract.llm")
|
|
18
|
+
|
|
19
|
+
-- Re-export all classes
|
|
20
|
+
return {
|
|
21
|
+
-- Core classes
|
|
22
|
+
BaseExtractor = base.BaseExtractor,
|
|
23
|
+
LLMExtractor = llm.LLMExtractor,
|
|
24
|
+
|
|
25
|
+
-- Helper for users who want to extend
|
|
26
|
+
class = base.class,
|
|
27
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
-- LLM-Based Extraction
|
|
2
|
+
--
|
|
3
|
+
-- Provides LLM-powered structured data extraction with:
|
|
4
|
+
-- - Retry logic for invalid responses
|
|
5
|
+
-- - JSON parsing and validation
|
|
6
|
+
-- - Field type validation
|
|
7
|
+
-- - Conversational feedback for self-correction
|
|
8
|
+
|
|
9
|
+
-- Load dependencies
|
|
10
|
+
local base = require("tactus.extract.base")
|
|
11
|
+
local BaseExtractor = base.BaseExtractor
|
|
12
|
+
local class = base.class
|
|
13
|
+
local json = require("tactus.io.json")
|
|
14
|
+
|
|
15
|
+
-- ============================================================================
|
|
16
|
+
-- LLMExtractor
|
|
17
|
+
-- ============================================================================
|
|
18
|
+
|
|
19
|
+
local LLMExtractor = class(BaseExtractor)
|
|
20
|
+
|
|
21
|
+
function LLMExtractor:init(config)
|
|
22
|
+
BaseExtractor.init(self, config)
|
|
23
|
+
|
|
24
|
+
-- Validate required fields
|
|
25
|
+
assert(config.fields, "LLMExtractor requires 'fields' field")
|
|
26
|
+
assert(config.prompt, "LLMExtractor requires 'prompt' field")
|
|
27
|
+
|
|
28
|
+
self.fields = config.fields
|
|
29
|
+
self.prompt = config.prompt
|
|
30
|
+
self.max_retries = config.max_retries or 3
|
|
31
|
+
self.temperature = config.temperature or 0.3
|
|
32
|
+
self.model = config.model
|
|
33
|
+
self.strict = config.strict ~= false -- Default to strict mode
|
|
34
|
+
|
|
35
|
+
-- Build extraction system prompt
|
|
36
|
+
self.system_prompt = self:build_system_prompt()
|
|
37
|
+
|
|
38
|
+
-- Create agent
|
|
39
|
+
local agent_config = {
|
|
40
|
+
system_prompt = self.system_prompt,
|
|
41
|
+
temperature = self.temperature,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if self.model then
|
|
45
|
+
local provider, model_id = self.model:match("([^/]+)/(.+)")
|
|
46
|
+
if provider and model_id then
|
|
47
|
+
agent_config.provider = provider
|
|
48
|
+
agent_config.model = model_id
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
self.agent = Agent(agent_config)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
function LLMExtractor:build_system_prompt()
|
|
56
|
+
-- Build fields description
|
|
57
|
+
local fields_lines = {}
|
|
58
|
+
for name, type_ in pairs(self.fields) do
|
|
59
|
+
table.insert(fields_lines, string.format(" - %s: %s", name, type_))
|
|
60
|
+
end
|
|
61
|
+
local fields_description = table.concat(fields_lines, "\n")
|
|
62
|
+
|
|
63
|
+
return string.format([[You are an information extraction assistant. Your task is to extract structured data according to the following instruction:
|
|
64
|
+
|
|
65
|
+
%s
|
|
66
|
+
|
|
67
|
+
FIELDS TO EXTRACT:
|
|
68
|
+
%s
|
|
69
|
+
|
|
70
|
+
IMPORTANT RULES:
|
|
71
|
+
1. You MUST respond with a valid JSON object containing the extracted fields.
|
|
72
|
+
2. Include ONLY the specified fields in your response.
|
|
73
|
+
3. Use null for fields that cannot be extracted from the input.
|
|
74
|
+
4. For "number" fields, return numeric values (not strings).
|
|
75
|
+
5. For "list" fields, return JSON arrays.
|
|
76
|
+
6. For "boolean" fields, return true or false.
|
|
77
|
+
7. Do NOT include any explanation or text outside the JSON.
|
|
78
|
+
|
|
79
|
+
RESPONSE FORMAT:
|
|
80
|
+
{
|
|
81
|
+
"field1": "extracted value",
|
|
82
|
+
"field2": 123,
|
|
83
|
+
...
|
|
84
|
+
}]], self.prompt, fields_description)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
function LLMExtractor:parse_json(response)
|
|
88
|
+
if not response or response == "" then
|
|
89
|
+
return nil, {"Empty response"}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
-- Try to find JSON object in response
|
|
93
|
+
local json_start = response:find("{")
|
|
94
|
+
local json_end = response:reverse():find("}")
|
|
95
|
+
|
|
96
|
+
if not json_start or not json_end then
|
|
97
|
+
return nil, {"No JSON object found in response"}
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
json_end = #response - json_end + 1
|
|
101
|
+
local json_str = response:sub(json_start, json_end)
|
|
102
|
+
|
|
103
|
+
-- Parse JSON using the json global
|
|
104
|
+
local success, parsed = pcall(function()
|
|
105
|
+
return json.decode(json_str)
|
|
106
|
+
end)
|
|
107
|
+
|
|
108
|
+
if not success then
|
|
109
|
+
return nil, {"Invalid JSON: " .. tostring(parsed)}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
return parsed, {}
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
function LLMExtractor:extract(input_text)
|
|
116
|
+
local retry_count = 0
|
|
117
|
+
local last_response = nil
|
|
118
|
+
local validation_errors = {}
|
|
119
|
+
|
|
120
|
+
for attempt = 1, self.max_retries + 1 do
|
|
121
|
+
-- Build message for this attempt
|
|
122
|
+
local message
|
|
123
|
+
if attempt == 1 then
|
|
124
|
+
message = "Please extract the following information:\n\n" .. input_text
|
|
125
|
+
else
|
|
126
|
+
-- Retry with feedback
|
|
127
|
+
retry_count = retry_count + 1
|
|
128
|
+
message = self:build_retry_feedback(last_response, validation_errors)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
-- Call agent
|
|
132
|
+
local agent_result = self.agent({message = message})
|
|
133
|
+
last_response = agent_result.output or ""
|
|
134
|
+
|
|
135
|
+
-- Parse and validate response
|
|
136
|
+
local parsed, parse_errors = self:parse_json(last_response)
|
|
137
|
+
|
|
138
|
+
if #parse_errors > 0 then
|
|
139
|
+
validation_errors = parse_errors
|
|
140
|
+
else
|
|
141
|
+
-- Validate extracted fields against schema
|
|
142
|
+
local result, val_errors = self:validate_fields(parsed, self.fields)
|
|
143
|
+
validation_errors = val_errors
|
|
144
|
+
|
|
145
|
+
if #validation_errors == 0 then
|
|
146
|
+
return {
|
|
147
|
+
fields = result,
|
|
148
|
+
retry_count = retry_count,
|
|
149
|
+
raw_response = last_response
|
|
150
|
+
}
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
-- All retries exhausted
|
|
156
|
+
return {
|
|
157
|
+
fields = {},
|
|
158
|
+
error = string.format("Max retries (%d) exceeded. Validation errors: %s",
|
|
159
|
+
self.max_retries, table.concat(validation_errors, ", ")),
|
|
160
|
+
retry_count = retry_count,
|
|
161
|
+
validation_errors = validation_errors,
|
|
162
|
+
raw_response = last_response
|
|
163
|
+
}
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
function LLMExtractor:build_retry_feedback(last_response, errors)
|
|
167
|
+
local errors_str = table.concat(errors, "\n - ")
|
|
168
|
+
local fields_list = {}
|
|
169
|
+
for name, _ in pairs(self.fields) do
|
|
170
|
+
table.insert(fields_list, '"' .. name .. '"')
|
|
171
|
+
end
|
|
172
|
+
local fields_str = table.concat(fields_list, ", ")
|
|
173
|
+
|
|
174
|
+
-- Truncate long responses
|
|
175
|
+
local response_preview = last_response
|
|
176
|
+
if #response_preview > 500 then
|
|
177
|
+
response_preview = response_preview:sub(1, 500) .. "..."
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
return string.format([[Your previous response was not valid JSON or had validation errors.
|
|
181
|
+
|
|
182
|
+
Previous response:
|
|
183
|
+
%s
|
|
184
|
+
|
|
185
|
+
Errors:
|
|
186
|
+
- %s
|
|
187
|
+
|
|
188
|
+
Please respond with ONLY a valid JSON object containing these fields: %s
|
|
189
|
+
|
|
190
|
+
Do NOT include any explanation or text outside the JSON object.]],
|
|
191
|
+
response_preview, errors_str, fields_str)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
function LLMExtractor:__call(text)
|
|
195
|
+
return self:extract(text)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
-- Export LLMExtractor
|
|
199
|
+
return {
|
|
200
|
+
LLMExtractor = LLMExtractor,
|
|
201
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
--[[doc
|
|
2
|
+
# Extraction Classes
|
|
3
|
+
|
|
4
|
+
Proper Lua class hierarchy for structured data extraction:
|
|
5
|
+
|
|
6
|
+
- **BaseExtractor**: Abstract base class with field validation
|
|
7
|
+
- **LLMExtractor**: LLM-based extraction with retry logic
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
```lua
|
|
12
|
+
-- Import extraction classes
|
|
13
|
+
local extract = require("tactus.extract")
|
|
14
|
+
local LLMExtractor = extract.LLMExtractor
|
|
15
|
+
|
|
16
|
+
-- Or load specific extractors (dependencies auto-load):
|
|
17
|
+
local LLMExtractor = require("tactus.extract.llm")
|
|
18
|
+
|
|
19
|
+
-- LLM Extraction
|
|
20
|
+
local extractor = LLMExtractor:new {
|
|
21
|
+
fields = {name = "string", age = "number", email = "string"},
|
|
22
|
+
prompt = "Extract customer information from this text",
|
|
23
|
+
model = "openai/gpt-4o-mini"
|
|
24
|
+
}
|
|
25
|
+
local result = extractor:extract("John Smith is 34 years old. Contact: john@example.com")
|
|
26
|
+
-- result.fields = {name = "John Smith", age = 34, email = "john@example.com"}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## LLMExtractor Parameters
|
|
30
|
+
|
|
31
|
+
- `fields` (required): Table mapping field names to types
|
|
32
|
+
- `prompt` (required): Extraction instruction
|
|
33
|
+
- `model`: Model identifier (e.g., "openai/gpt-4o-mini")
|
|
34
|
+
- `temperature`: LLM temperature (default: 0.3)
|
|
35
|
+
- `max_retries`: Maximum retry attempts (default: 3)
|
|
36
|
+
- `strict`: Require all fields (default: true)
|
|
37
|
+
|
|
38
|
+
## Field Types
|
|
39
|
+
|
|
40
|
+
- `string`: Text values
|
|
41
|
+
- `number`: Numeric values (float)
|
|
42
|
+
- `integer`: Whole numbers
|
|
43
|
+
- `boolean`: true/false values
|
|
44
|
+
- `list`/`array`: JSON arrays
|
|
45
|
+
- `object`/`dict`: JSON objects
|
|
46
|
+
]]
|
|
47
|
+
|
|
48
|
+
-- Load extraction classes
|
|
49
|
+
local extract = require("tactus.extract")
|
|
50
|
+
local LLMExtractor = extract.LLMExtractor
|
|
51
|
+
|
|
52
|
+
-- Local state for test context
|
|
53
|
+
local test_state = {}
|
|
54
|
+
|
|
55
|
+
-- Custom step definitions
|
|
56
|
+
Step("an LLM extractor with fields (.+)", function(ctx, fields_str)
|
|
57
|
+
local fields = {}
|
|
58
|
+
-- Parse field definitions like: name:string, age:number
|
|
59
|
+
for field_def in string.gmatch(fields_str, "([^,]+)") do
|
|
60
|
+
field_def = field_def:gsub("^%s+", ""):gsub("%s+$", "")
|
|
61
|
+
local name, type_ = field_def:match("([^:]+):([^:]+)")
|
|
62
|
+
if name and type_ then
|
|
63
|
+
fields[name:gsub("^%s+", ""):gsub("%s+$", "")] = type_:gsub("^%s+", ""):gsub("%s+$", "")
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
test_state.extractor_config = {
|
|
67
|
+
fields = fields,
|
|
68
|
+
model = "openai/gpt-4o-mini"
|
|
69
|
+
}
|
|
70
|
+
end)
|
|
71
|
+
|
|
72
|
+
Step("extraction prompt \"(.+)\"", function(ctx, prompt)
|
|
73
|
+
test_state.extractor_config.prompt = prompt
|
|
74
|
+
end)
|
|
75
|
+
|
|
76
|
+
Step("I extract from \"(.+)\"", function(ctx, text)
|
|
77
|
+
if not test_state.extractor then
|
|
78
|
+
test_state.extractor = LLMExtractor:new(test_state.extractor_config)
|
|
79
|
+
end
|
|
80
|
+
test_state.result = test_state.extractor:extract(text)
|
|
81
|
+
end)
|
|
82
|
+
|
|
83
|
+
Step("the extracted field \"(.+)\" should be \"(.+)\"", function(ctx, field, expected)
|
|
84
|
+
assert(test_state.result, "No extraction result found")
|
|
85
|
+
assert(test_state.result.fields, "No fields in extraction result")
|
|
86
|
+
local actual = test_state.result.fields[field]
|
|
87
|
+
assert(tostring(actual) == expected,
|
|
88
|
+
"Expected field '" .. field .. "' to be '" .. expected .. "' but got '" .. tostring(actual) .. "'")
|
|
89
|
+
end)
|
|
90
|
+
|
|
91
|
+
Step("the extracted field \"(.+)\" should be number (.+)", function(ctx, field, expected)
|
|
92
|
+
assert(test_state.result, "No extraction result found")
|
|
93
|
+
assert(test_state.result.fields, "No fields in extraction result")
|
|
94
|
+
local actual = test_state.result.fields[field]
|
|
95
|
+
assert(type(actual) == "number",
|
|
96
|
+
"Expected field '" .. field .. "' to be a number but got " .. type(actual))
|
|
97
|
+
assert(actual == tonumber(expected),
|
|
98
|
+
"Expected field '" .. field .. "' to be " .. expected .. " but got " .. tostring(actual))
|
|
99
|
+
end)
|
|
100
|
+
|
|
101
|
+
Step("the extraction should succeed", function(ctx)
|
|
102
|
+
assert(test_state.result, "No extraction result found")
|
|
103
|
+
assert(not test_state.result.error,
|
|
104
|
+
"Extraction failed with error: " .. tostring(test_state.result.error))
|
|
105
|
+
end)
|
|
106
|
+
|
|
107
|
+
Step("the extraction should have no validation errors", function(ctx)
|
|
108
|
+
assert(test_state.result, "No extraction result found")
|
|
109
|
+
local errors = test_state.result.validation_errors or {}
|
|
110
|
+
assert(#errors == 0,
|
|
111
|
+
"Expected no validation errors but got: " .. table.concat(errors, ", "))
|
|
112
|
+
end)
|
|
113
|
+
|
|
114
|
+
-- BDD Specifications
|
|
115
|
+
Specification([[
|
|
116
|
+
Feature: Extraction Class Hierarchy
|
|
117
|
+
As a Tactus developer
|
|
118
|
+
I want to extract structured data from text
|
|
119
|
+
So that I can process unstructured information programmatically
|
|
120
|
+
|
|
121
|
+
Scenario: Extract simple contact information
|
|
122
|
+
Given an LLM extractor with fields name:string, age:number
|
|
123
|
+
And extraction prompt "Extract the person's name and age"
|
|
124
|
+
When I extract from "John Smith is 34 years old"
|
|
125
|
+
Then the extraction should succeed
|
|
126
|
+
And the extracted field "name" should be "John Smith"
|
|
127
|
+
And the extracted field "age" should be number 34
|
|
128
|
+
|
|
129
|
+
Scenario: Extract multiple string fields
|
|
130
|
+
Given an LLM extractor with fields city:string, country:string
|
|
131
|
+
And extraction prompt "Extract the city and country"
|
|
132
|
+
When I extract from "The meeting will be held in Paris, France"
|
|
133
|
+
Then the extraction should succeed
|
|
134
|
+
And the extracted field "city" should be "Paris"
|
|
135
|
+
And the extracted field "country" should be "France"
|
|
136
|
+
|
|
137
|
+
Scenario: Extract with validation
|
|
138
|
+
Given an LLM extractor with fields product:string, price:number, quantity:integer
|
|
139
|
+
And extraction prompt "Extract product details"
|
|
140
|
+
When I extract from "Order: 5 widgets at $19.99 each"
|
|
141
|
+
Then the extraction should succeed
|
|
142
|
+
And the extraction should have no validation errors
|
|
143
|
+
]])
|
|
144
|
+
|
|
145
|
+
-- Minimal procedure
|
|
146
|
+
Procedure {
|
|
147
|
+
output = {
|
|
148
|
+
result = field.string{required = true}
|
|
149
|
+
},
|
|
150
|
+
function(input)
|
|
151
|
+
return {result = "Extraction class hierarchy specs executed"}
|
|
152
|
+
end
|
|
153
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
-- Base Generator Module
|
|
2
|
+
--
|
|
3
|
+
-- Provides the BaseGenerator abstract class and class helper for
|
|
4
|
+
-- building custom generator implementations.
|
|
5
|
+
--
|
|
6
|
+
-- Inspired by DSPy's modular generation approach:
|
|
7
|
+
-- - Configurable generation parameters
|
|
8
|
+
-- - Optional chain-of-thought reasoning
|
|
9
|
+
-- - Output format control
|
|
10
|
+
-- - Retry logic for robustness
|
|
11
|
+
|
|
12
|
+
-- ============================================================================
|
|
13
|
+
-- Class Helper (same pattern as classify/extract modules)
|
|
14
|
+
-- ============================================================================
|
|
15
|
+
|
|
16
|
+
local function class(base)
|
|
17
|
+
local cls = {}
|
|
18
|
+
cls.__index = cls
|
|
19
|
+
|
|
20
|
+
if base then
|
|
21
|
+
setmetatable(cls, {__index = base})
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
function cls:new(config)
|
|
25
|
+
local instance = setmetatable({}, cls)
|
|
26
|
+
if instance.init then
|
|
27
|
+
instance:init(config or {})
|
|
28
|
+
end
|
|
29
|
+
return instance
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
return cls
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
-- ============================================================================
|
|
36
|
+
-- BaseGenerator
|
|
37
|
+
-- ============================================================================
|
|
38
|
+
|
|
39
|
+
local BaseGenerator = class()
|
|
40
|
+
|
|
41
|
+
function BaseGenerator:init(config)
|
|
42
|
+
-- Core configuration
|
|
43
|
+
self.name = config.name
|
|
44
|
+
self.model = config.model
|
|
45
|
+
self.temperature = config.temperature or 0.7
|
|
46
|
+
self.max_tokens = config.max_tokens
|
|
47
|
+
self.max_retries = config.max_retries or 2
|
|
48
|
+
|
|
49
|
+
-- Generation options (DSPy-inspired)
|
|
50
|
+
self.reasoning = config.reasoning or false -- Chain-of-thought mode
|
|
51
|
+
self.output_format = config.output_format or "text" -- "text", "json", "markdown"
|
|
52
|
+
self.constraints = config.constraints -- Output constraints (optional)
|
|
53
|
+
|
|
54
|
+
-- System prompt and instructions
|
|
55
|
+
self.system_prompt = config.system_prompt
|
|
56
|
+
self.instructions = config.instructions
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
function BaseGenerator:generate(prompt)
|
|
60
|
+
error("BaseGenerator:generate() must be implemented by subclass")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
function BaseGenerator:__call(prompt)
|
|
64
|
+
return self:generate(prompt)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
-- ============================================================================
|
|
68
|
+
-- Helper functions for subclasses
|
|
69
|
+
-- ============================================================================
|
|
70
|
+
|
|
71
|
+
-- Build system prompt
|
|
72
|
+
-- Note: Reasoning is handled by DSPy's ChainOfThought module, not manual prompts
|
|
73
|
+
function BaseGenerator:build_system_prompt()
|
|
74
|
+
local parts = {}
|
|
75
|
+
|
|
76
|
+
-- Base system prompt
|
|
77
|
+
if self.system_prompt then
|
|
78
|
+
table.insert(parts, self.system_prompt)
|
|
79
|
+
else
|
|
80
|
+
table.insert(parts, "You are a helpful assistant.")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
-- Note: reasoning is NOT added to system prompt here
|
|
84
|
+
-- When reasoning=true, we use DSPy's ChainOfThought module which handles
|
|
85
|
+
-- reasoning automatically without modifying the prompt
|
|
86
|
+
|
|
87
|
+
-- Add output format instructions
|
|
88
|
+
if self.output_format == "json" then
|
|
89
|
+
table.insert(parts, "Respond with valid JSON only. No markdown formatting or code blocks.")
|
|
90
|
+
elseif self.output_format == "markdown" then
|
|
91
|
+
table.insert(parts, "Format your response using Markdown.")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
-- Add custom instructions
|
|
95
|
+
if self.instructions then
|
|
96
|
+
table.insert(parts, self.instructions)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
-- Add constraints
|
|
100
|
+
if self.constraints then
|
|
101
|
+
if type(self.constraints) == "table" then
|
|
102
|
+
table.insert(parts, "Constraints: " .. table.concat(self.constraints, ", "))
|
|
103
|
+
else
|
|
104
|
+
table.insert(parts, "Constraints: " .. self.constraints)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
return table.concat(parts, "\n\n")
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
-- Parse response to extract reasoning and final response
|
|
112
|
+
function BaseGenerator:parse_reasoning_response(response)
|
|
113
|
+
if not self.reasoning then
|
|
114
|
+
return {
|
|
115
|
+
response = response,
|
|
116
|
+
reasoning = nil
|
|
117
|
+
}
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
-- Try to extract REASONING and RESPONSE sections
|
|
121
|
+
local reasoning = response:match("REASONING:%s*(.-)%s*RESPONSE:")
|
|
122
|
+
local final_response = response:match("RESPONSE:%s*(.*)$")
|
|
123
|
+
|
|
124
|
+
if reasoning and final_response then
|
|
125
|
+
return {
|
|
126
|
+
response = final_response:gsub("^%s+", ""):gsub("%s+$", ""),
|
|
127
|
+
reasoning = reasoning:gsub("^%s+", ""):gsub("%s+$", "")
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
-- If pattern doesn't match, return whole response
|
|
132
|
+
return {
|
|
133
|
+
response = response,
|
|
134
|
+
reasoning = nil
|
|
135
|
+
}
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
-- Export BaseGenerator and class helper
|
|
139
|
+
return {
|
|
140
|
+
BaseGenerator = BaseGenerator,
|
|
141
|
+
class = class,
|
|
142
|
+
}
|