tactus 0.32.2__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/__init__.py +18 -1
- tactus/adapters/broker_log.py +127 -34
- tactus/adapters/channels/__init__.py +153 -0
- tactus/adapters/channels/base.py +174 -0
- tactus/adapters/channels/broker.py +179 -0
- tactus/adapters/channels/cli.py +448 -0
- tactus/adapters/channels/host.py +225 -0
- tactus/adapters/channels/ipc.py +297 -0
- tactus/adapters/channels/sse.py +305 -0
- tactus/adapters/cli_hitl.py +223 -1
- tactus/adapters/control_loop.py +879 -0
- tactus/adapters/file_storage.py +35 -2
- tactus/adapters/ide_log.py +7 -1
- tactus/backends/http_backend.py +0 -1
- tactus/broker/client.py +31 -1
- tactus/broker/server.py +416 -92
- tactus/cli/app.py +270 -7
- tactus/cli/control.py +393 -0
- tactus/core/config_manager.py +33 -6
- tactus/core/dsl_stubs.py +102 -18
- tactus/core/execution_context.py +265 -8
- tactus/core/lua_sandbox.py +8 -9
- tactus/core/registry.py +19 -2
- tactus/core/runtime.py +235 -27
- tactus/docker/Dockerfile.pypi +49 -0
- tactus/docs/__init__.py +33 -0
- tactus/docs/extractor.py +326 -0
- tactus/docs/html_renderer.py +72 -0
- tactus/docs/models.py +121 -0
- tactus/docs/templates/base.html +204 -0
- tactus/docs/templates/index.html +58 -0
- tactus/docs/templates/module.html +96 -0
- tactus/dspy/agent.py +382 -22
- tactus/dspy/broker_lm.py +57 -6
- tactus/dspy/config.py +14 -3
- tactus/dspy/history.py +2 -1
- tactus/dspy/module.py +136 -11
- tactus/dspy/signature.py +0 -1
- tactus/ide/server.py +300 -9
- tactus/primitives/human.py +619 -47
- tactus/primitives/system.py +0 -1
- tactus/protocols/__init__.py +25 -0
- tactus/protocols/control.py +427 -0
- tactus/protocols/notification.py +207 -0
- tactus/sandbox/container_runner.py +79 -11
- tactus/sandbox/docker_manager.py +23 -0
- tactus/sandbox/entrypoint.py +26 -0
- tactus/sandbox/protocol.py +3 -0
- tactus/stdlib/README.md +77 -0
- tactus/stdlib/__init__.py +27 -1
- tactus/stdlib/classify/__init__.py +165 -0
- tactus/stdlib/classify/classify.spec.tac +195 -0
- tactus/stdlib/classify/classify.tac +257 -0
- tactus/stdlib/classify/fuzzy.py +282 -0
- tactus/stdlib/classify/llm.py +319 -0
- tactus/stdlib/classify/primitive.py +287 -0
- tactus/stdlib/core/__init__.py +57 -0
- tactus/stdlib/core/base.py +320 -0
- tactus/stdlib/core/confidence.py +211 -0
- tactus/stdlib/core/models.py +161 -0
- tactus/stdlib/core/retry.py +171 -0
- tactus/stdlib/core/validation.py +274 -0
- tactus/stdlib/extract/__init__.py +125 -0
- tactus/stdlib/extract/llm.py +330 -0
- tactus/stdlib/extract/primitive.py +256 -0
- tactus/stdlib/tac/tactus/classify/base.tac +51 -0
- tactus/stdlib/tac/tactus/classify/fuzzy.tac +87 -0
- tactus/stdlib/tac/tactus/classify/index.md +77 -0
- tactus/stdlib/tac/tactus/classify/init.tac +29 -0
- tactus/stdlib/tac/tactus/classify/llm.tac +150 -0
- tactus/stdlib/tac/tactus/classify.spec.tac +191 -0
- tactus/stdlib/tac/tactus/extract/base.tac +138 -0
- tactus/stdlib/tac/tactus/extract/index.md +96 -0
- tactus/stdlib/tac/tactus/extract/init.tac +27 -0
- tactus/stdlib/tac/tactus/extract/llm.tac +201 -0
- tactus/stdlib/tac/tactus/extract.spec.tac +153 -0
- tactus/stdlib/tac/tactus/generate/base.tac +142 -0
- tactus/stdlib/tac/tactus/generate/index.md +195 -0
- tactus/stdlib/tac/tactus/generate/init.tac +28 -0
- tactus/stdlib/tac/tactus/generate/llm.tac +169 -0
- tactus/stdlib/tac/tactus/generate.spec.tac +210 -0
- tactus/testing/behave_integration.py +171 -7
- tactus/testing/context.py +0 -1
- tactus/testing/evaluation_runner.py +0 -1
- tactus/testing/gherkin_parser.py +0 -1
- tactus/testing/mock_hitl.py +0 -1
- tactus/testing/mock_tools.py +0 -1
- tactus/testing/models.py +0 -1
- tactus/testing/steps/builtin.py +0 -1
- tactus/testing/steps/custom.py +81 -22
- tactus/testing/steps/registry.py +0 -1
- tactus/testing/test_runner.py +7 -1
- tactus/validation/semantic_visitor.py +11 -5
- tactus/validation/validator.py +0 -1
- {tactus-0.32.2.dist-info → tactus-0.34.0.dist-info}/METADATA +14 -2
- {tactus-0.32.2.dist-info → tactus-0.34.0.dist-info}/RECORD +100 -49
- {tactus-0.32.2.dist-info → tactus-0.34.0.dist-info}/WHEEL +0 -0
- {tactus-0.32.2.dist-info → tactus-0.34.0.dist-info}/entry_points.txt +0 -0
- {tactus-0.32.2.dist-info → tactus-0.34.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
-- Fuzzy String Matching Classification
|
|
2
|
+
--
|
|
3
|
+
-- Provides string similarity-based classification:
|
|
4
|
+
-- - Configurable similarity threshold
|
|
5
|
+
-- - Case-insensitive matching
|
|
6
|
+
-- - Character overlap similarity algorithm
|
|
7
|
+
|
|
8
|
+
-- Load dependencies
|
|
9
|
+
local base = require("tactus.classify.base")
|
|
10
|
+
local BaseClassifier = base.BaseClassifier
|
|
11
|
+
local class = base.class
|
|
12
|
+
|
|
13
|
+
-- ============================================================================
|
|
14
|
+
-- FuzzyMatchClassifier
|
|
15
|
+
-- ============================================================================
|
|
16
|
+
|
|
17
|
+
local FuzzyMatchClassifier = class(BaseClassifier)
|
|
18
|
+
|
|
19
|
+
function FuzzyMatchClassifier:init(config)
|
|
20
|
+
BaseClassifier.init(self, config)
|
|
21
|
+
|
|
22
|
+
assert(config.expected, "FuzzyMatchClassifier requires 'expected' field")
|
|
23
|
+
|
|
24
|
+
self.expected = config.expected
|
|
25
|
+
self.threshold = config.threshold or 0.8
|
|
26
|
+
self.classes = config.classes or {"Yes", "No"}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
function FuzzyMatchClassifier:calculate_similarity(s1, s2)
|
|
30
|
+
s1 = s1:lower()
|
|
31
|
+
s2 = s2:lower()
|
|
32
|
+
|
|
33
|
+
if s1 == s2 then
|
|
34
|
+
return 1.0
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if s1:find(s2, 1, true) or s2:find(s1, 1, true) then
|
|
38
|
+
return 0.85
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
-- Character overlap similarity
|
|
42
|
+
local set1 = {}
|
|
43
|
+
for i = 1, #s1 do
|
|
44
|
+
set1[s1:sub(i,i)] = true
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
local intersection = 0
|
|
48
|
+
local set2 = {}
|
|
49
|
+
for i = 1, #s2 do
|
|
50
|
+
local char = s2:sub(i,i)
|
|
51
|
+
set2[char] = true
|
|
52
|
+
if set1[char] then
|
|
53
|
+
intersection = intersection + 1
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
local union = 0
|
|
58
|
+
for _ in pairs(set1) do union = union + 1 end
|
|
59
|
+
for char in pairs(set2) do
|
|
60
|
+
if not set1[char] then
|
|
61
|
+
union = union + 1
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
if union == 0 then
|
|
66
|
+
return 0.0
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
return intersection / union
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
function FuzzyMatchClassifier:classify(input_text)
|
|
73
|
+
local similarity = self:calculate_similarity(input_text, self.expected)
|
|
74
|
+
local value = similarity >= self.threshold and self.classes[1] or self.classes[2]
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
value = value,
|
|
78
|
+
confidence = similarity,
|
|
79
|
+
matched_text = self.expected, -- What it matched against
|
|
80
|
+
retry_count = 0
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
-- Export FuzzyMatchClassifier
|
|
85
|
+
return {
|
|
86
|
+
FuzzyMatchClassifier = FuzzyMatchClassifier,
|
|
87
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Classification Module
|
|
2
|
+
|
|
3
|
+
The `tactus.classify` module provides a comprehensive classification system with support for both LLM-based and fuzzy string matching approaches.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
All classifiers extend `BaseClassifier` and share a common interface, making them interchangeable. This enables you to switch between LLM and fuzzy matching without changing your code structure.
|
|
8
|
+
|
|
9
|
+
## When to Use
|
|
10
|
+
|
|
11
|
+
- **LLMClassifier**: Use when you need semantic understanding and context-aware classification. Ideal for ambiguous cases where the answer isn't just string matching.
|
|
12
|
+
|
|
13
|
+
- **FuzzyMatchClassifier**: Use when you're matching against known expected values with typo tolerance. Much faster than LLM calls and doesn't require API access.
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
The module uses a proper Lua class hierarchy:
|
|
18
|
+
|
|
19
|
+
- `BaseClassifier` - Abstract base with common interface
|
|
20
|
+
- `LLMClassifier` - LLM-powered classification with automatic retry logic
|
|
21
|
+
- `FuzzyMatchClassifier` - String similarity matching with configurable thresholds
|
|
22
|
+
|
|
23
|
+
All classifiers return a consistent result format:
|
|
24
|
+
|
|
25
|
+
```lua
|
|
26
|
+
{
|
|
27
|
+
value = "Yes", -- Classification result
|
|
28
|
+
confidence = 0.85, -- Confidence score (0.0-1.0)
|
|
29
|
+
retry_count = 0, -- Number of retries needed
|
|
30
|
+
matched_text = "yes", -- Original matched text (fuzzy only)
|
|
31
|
+
raw_response = "..." -- LLM response (LLM only)
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Loading the Module
|
|
36
|
+
|
|
37
|
+
```lua
|
|
38
|
+
-- Load the main module
|
|
39
|
+
local classify = require("tactus.classify")
|
|
40
|
+
|
|
41
|
+
-- Or load specific classifiers (dependencies auto-load)
|
|
42
|
+
local LLMClassifier = require("tactus.classify.llm")
|
|
43
|
+
local FuzzyMatchClassifier = require("tactus.classify.fuzzy")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Performance Notes
|
|
47
|
+
|
|
48
|
+
- LLM classification typically takes 1-3 seconds per call
|
|
49
|
+
- Fuzzy matching is nearly instantaneous (<1ms)
|
|
50
|
+
- Consider caching LLM results for repeated classifications
|
|
51
|
+
- Fuzzy matching works offline and requires no API keys
|
|
52
|
+
|
|
53
|
+
## Extending Classifiers
|
|
54
|
+
|
|
55
|
+
You can extend `BaseClassifier` to create custom classifiers:
|
|
56
|
+
|
|
57
|
+
```lua
|
|
58
|
+
local base = require("tactus.classify.base")
|
|
59
|
+
local class = base.class
|
|
60
|
+
local BaseClassifier = base.BaseClassifier
|
|
61
|
+
|
|
62
|
+
MyClassifier = class(BaseClassifier)
|
|
63
|
+
|
|
64
|
+
function MyClassifier:init(config)
|
|
65
|
+
BaseClassifier.init(self, config)
|
|
66
|
+
-- Your initialization
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
function MyClassifier:classify(text)
|
|
70
|
+
-- Your classification logic
|
|
71
|
+
return {
|
|
72
|
+
value = "Yes",
|
|
73
|
+
confidence = 1.0,
|
|
74
|
+
retry_count = 0
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
```
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
-- Tactus Classification Module
|
|
2
|
+
--
|
|
3
|
+
-- Provides a comprehensive classification system with:
|
|
4
|
+
-- - LLM-based classification (tactus.classify.llm)
|
|
5
|
+
-- - Fuzzy string matching (tactus.classify.fuzzy)
|
|
6
|
+
-- - Extensible base class (tactus.classify.base)
|
|
7
|
+
--
|
|
8
|
+
-- Usage:
|
|
9
|
+
-- local classify = require("tactus.classify")
|
|
10
|
+
-- local classifier = classify.LLMClassifier:new{...}
|
|
11
|
+
--
|
|
12
|
+
-- Or load specific classifiers:
|
|
13
|
+
-- local LLMClassifier = require("tactus.classify.llm")
|
|
14
|
+
|
|
15
|
+
-- Load all submodules
|
|
16
|
+
local base = require("tactus.classify.base")
|
|
17
|
+
local llm = require("tactus.classify.llm")
|
|
18
|
+
local fuzzy = require("tactus.classify.fuzzy")
|
|
19
|
+
|
|
20
|
+
-- Re-export all classes
|
|
21
|
+
return {
|
|
22
|
+
-- Core classes
|
|
23
|
+
BaseClassifier = base.BaseClassifier,
|
|
24
|
+
LLMClassifier = llm.LLMClassifier,
|
|
25
|
+
FuzzyMatchClassifier = fuzzy.FuzzyMatchClassifier,
|
|
26
|
+
|
|
27
|
+
-- Helper for users who want to extend
|
|
28
|
+
class = base.class,
|
|
29
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
-- LLM-Based Classification
|
|
2
|
+
--
|
|
3
|
+
-- Provides LLM-powered text classification with:
|
|
4
|
+
-- - Retry logic for invalid responses
|
|
5
|
+
-- - Multiple class support
|
|
6
|
+
-- - Configurable confidence modes
|
|
7
|
+
-- - Response parsing with fallbacks
|
|
8
|
+
|
|
9
|
+
-- Load dependencies
|
|
10
|
+
local base = require("tactus.classify.base")
|
|
11
|
+
local BaseClassifier = base.BaseClassifier
|
|
12
|
+
local class = base.class
|
|
13
|
+
|
|
14
|
+
-- ============================================================================
|
|
15
|
+
-- LLMClassifier
|
|
16
|
+
-- ============================================================================
|
|
17
|
+
|
|
18
|
+
local LLMClassifier = class(BaseClassifier)
|
|
19
|
+
|
|
20
|
+
function LLMClassifier:init(config)
|
|
21
|
+
BaseClassifier.init(self, config)
|
|
22
|
+
|
|
23
|
+
-- Validate required fields
|
|
24
|
+
assert(config.classes, "LLMClassifier requires 'classes' field")
|
|
25
|
+
assert(config.prompt, "LLMClassifier requires 'prompt' field")
|
|
26
|
+
|
|
27
|
+
self.classes = config.classes
|
|
28
|
+
self.prompt = config.prompt
|
|
29
|
+
self.max_retries = config.max_retries or 3
|
|
30
|
+
self.temperature = config.temperature or 0.3
|
|
31
|
+
self.model = config.model
|
|
32
|
+
self.confidence_mode = config.confidence_mode or "heuristic"
|
|
33
|
+
|
|
34
|
+
-- Build classification prompt
|
|
35
|
+
local classes_str = table.concat(self.classes, ", ")
|
|
36
|
+
self.system_prompt = string.format([[%s
|
|
37
|
+
|
|
38
|
+
You MUST respond with ONLY one of these values: %s
|
|
39
|
+
|
|
40
|
+
Response format:
|
|
41
|
+
- Start your response with the classification value on its own line
|
|
42
|
+
- You may optionally explain your reasoning afterward
|
|
43
|
+
|
|
44
|
+
Valid values: %s]], self.prompt, classes_str, classes_str)
|
|
45
|
+
|
|
46
|
+
-- Create agent
|
|
47
|
+
local agent_config = {
|
|
48
|
+
system_prompt = self.system_prompt,
|
|
49
|
+
temperature = self.temperature,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if self.model then
|
|
53
|
+
local provider, model_id = self.model:match("([^/]+)/(.+)")
|
|
54
|
+
if provider and model_id then
|
|
55
|
+
agent_config.provider = provider
|
|
56
|
+
agent_config.model = model_id
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
self.agent = Agent(agent_config)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
function LLMClassifier:parse_response(response)
|
|
64
|
+
if not response or response == "" then
|
|
65
|
+
return nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
-- Get first line
|
|
69
|
+
local first_line = response:match("^([^\n]+)")
|
|
70
|
+
if not first_line then
|
|
71
|
+
first_line = response
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
-- Clean up formatting
|
|
75
|
+
first_line = first_line:gsub("[%*\"'`:%.]", ""):gsub("^%s+", ""):gsub("%s+$", "")
|
|
76
|
+
local first_line_lower = first_line:lower()
|
|
77
|
+
|
|
78
|
+
-- Create case-insensitive lookup
|
|
79
|
+
local value_map = {}
|
|
80
|
+
for _, v in ipairs(self.classes) do
|
|
81
|
+
value_map[v:lower()] = v
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
-- Exact match (case-insensitive)
|
|
85
|
+
if value_map[first_line_lower] then
|
|
86
|
+
return value_map[first_line_lower]
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
-- Prefix match
|
|
90
|
+
for v_lower, v_original in pairs(value_map) do
|
|
91
|
+
if first_line_lower:find("^" .. v_lower) then
|
|
92
|
+
return v_original
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
return nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
function LLMClassifier:classify(input_text)
|
|
100
|
+
local retry_count = 0
|
|
101
|
+
local last_response = nil
|
|
102
|
+
|
|
103
|
+
for attempt = 1, self.max_retries + 1 do
|
|
104
|
+
-- Call agent
|
|
105
|
+
local agent_result = self.agent({message = input_text})
|
|
106
|
+
last_response = agent_result.output or ""
|
|
107
|
+
|
|
108
|
+
-- Parse classification
|
|
109
|
+
local value = self:parse_response(last_response)
|
|
110
|
+
|
|
111
|
+
if value then
|
|
112
|
+
local result = {
|
|
113
|
+
value = value,
|
|
114
|
+
retry_count = retry_count,
|
|
115
|
+
raw_response = last_response
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if self.confidence_mode == "heuristic" then
|
|
119
|
+
result.confidence = 0.8
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
-- Retry
|
|
126
|
+
retry_count = retry_count + 1
|
|
127
|
+
|
|
128
|
+
if attempt <= self.max_retries then
|
|
129
|
+
local feedback = string.format(
|
|
130
|
+
"Your response '%s' is not valid. Please respond with ONLY one of: %s",
|
|
131
|
+
last_response,
|
|
132
|
+
table.concat(self.classes, ", ")
|
|
133
|
+
)
|
|
134
|
+
self.agent({message = feedback})
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
-- All retries exhausted
|
|
139
|
+
return {
|
|
140
|
+
value = "ERROR",
|
|
141
|
+
error = "Failed to get valid classification after " .. self.max_retries .. " retries",
|
|
142
|
+
retry_count = retry_count,
|
|
143
|
+
raw_response = last_response
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
-- Export LLMClassifier
|
|
148
|
+
return {
|
|
149
|
+
LLMClassifier = LLMClassifier,
|
|
150
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
--[[doc
|
|
2
|
+
# Classification Classes
|
|
3
|
+
|
|
4
|
+
Proper Lua class hierarchy for text classification:
|
|
5
|
+
|
|
6
|
+
- **BaseClassifier**: Abstract base class
|
|
7
|
+
- **LLMClassifier**: LLM-based classification with retry logic
|
|
8
|
+
- **FuzzyMatchClassifier**: String similarity matching
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
```lua
|
|
13
|
+
-- Import classification classes
|
|
14
|
+
local classify = require("tactus.classify")
|
|
15
|
+
local LLMClassifier = classify.LLMClassifier
|
|
16
|
+
local FuzzyMatchClassifier = classify.FuzzyMatchClassifier
|
|
17
|
+
|
|
18
|
+
-- Or load specific classifiers (dependencies auto-load):
|
|
19
|
+
local LLMClassifier = require("tactus.classify.llm")
|
|
20
|
+
|
|
21
|
+
-- LLM Classification
|
|
22
|
+
local classifier = LLMClassifier:new {
|
|
23
|
+
classes = {"Yes", "No"},
|
|
24
|
+
prompt = "Is this a question?",
|
|
25
|
+
model = "openai/gpt-4o-mini"
|
|
26
|
+
}
|
|
27
|
+
local result = classifier:classify("How are you?")
|
|
28
|
+
|
|
29
|
+
-- Fuzzy Matching
|
|
30
|
+
local fuzzy = FuzzyMatchClassifier:new {
|
|
31
|
+
expected = "hello",
|
|
32
|
+
threshold = 0.8
|
|
33
|
+
}
|
|
34
|
+
local result = fuzzy:classify("helo")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## LLMClassifier Parameters
|
|
38
|
+
|
|
39
|
+
- `classes` (required): List of valid classification values
|
|
40
|
+
- `prompt` (required): Classification instruction
|
|
41
|
+
- `model`: Model identifier (e.g., "openai/gpt-4o-mini")
|
|
42
|
+
- `temperature`: LLM temperature (default: 0.3)
|
|
43
|
+
- `max_retries`: Maximum retry attempts (default: 3)
|
|
44
|
+
- `confidence_mode`: "heuristic" or "none" (default: "heuristic")
|
|
45
|
+
|
|
46
|
+
## FuzzyMatchClassifier Parameters
|
|
47
|
+
|
|
48
|
+
- `expected` (required): Expected string to match against
|
|
49
|
+
- `threshold`: Similarity threshold 0.0-1.0 (default: 0.8)
|
|
50
|
+
- `classes`: Output values (default: ["Yes", "No"])
|
|
51
|
+
|
|
52
|
+
## Confidence Warning
|
|
53
|
+
|
|
54
|
+
LLM self-assessed confidence is generally unreliable unless calibrated
|
|
55
|
+
for your specific use case. Consider using fuzzy matching or human
|
|
56
|
+
review for high-stakes decisions.
|
|
57
|
+
]]
|
|
58
|
+
|
|
59
|
+
-- Load classification classes
|
|
60
|
+
local classify = require("tactus.classify")
|
|
61
|
+
local LLMClassifier = classify.LLMClassifier
|
|
62
|
+
local FuzzyMatchClassifier = classify.FuzzyMatchClassifier
|
|
63
|
+
|
|
64
|
+
-- Local state for test context
|
|
65
|
+
local test_state = {}
|
|
66
|
+
|
|
67
|
+
-- Custom step definitions
|
|
68
|
+
Step("an LLM classifier with classes (.+)", function(ctx, classes_str)
|
|
69
|
+
local classes = {}
|
|
70
|
+
for class in string.gmatch(classes_str, '"([^"]+)"') do
|
|
71
|
+
table.insert(classes, class)
|
|
72
|
+
end
|
|
73
|
+
test_state.classifier_config = {
|
|
74
|
+
classes = classes,
|
|
75
|
+
model = "openai/gpt-4o-mini"
|
|
76
|
+
}
|
|
77
|
+
test_state.classifier_type = "llm"
|
|
78
|
+
end)
|
|
79
|
+
|
|
80
|
+
Step("prompt \"(.+)\"", function(ctx, prompt)
|
|
81
|
+
test_state.classifier_config.prompt = prompt
|
|
82
|
+
end)
|
|
83
|
+
|
|
84
|
+
Step("a fuzzy classifier expecting \"(.+)\"", function(ctx, expected)
|
|
85
|
+
test_state.classifier_config = {
|
|
86
|
+
expected = expected
|
|
87
|
+
}
|
|
88
|
+
test_state.classifier_type = "fuzzy"
|
|
89
|
+
end)
|
|
90
|
+
|
|
91
|
+
Step("I create the classifier", function(ctx)
|
|
92
|
+
if test_state.classifier_type == "llm" then
|
|
93
|
+
test_state.classifier = LLMClassifier:new(test_state.classifier_config)
|
|
94
|
+
elseif test_state.classifier_type == "fuzzy" then
|
|
95
|
+
test_state.classifier = FuzzyMatchClassifier:new(test_state.classifier_config)
|
|
96
|
+
else
|
|
97
|
+
error("Unknown classifier type: " .. tostring(test_state.classifier_type))
|
|
98
|
+
end
|
|
99
|
+
end)
|
|
100
|
+
|
|
101
|
+
Step("I classify \"(.+)\"", function(ctx, text)
|
|
102
|
+
if not test_state.classifier then
|
|
103
|
+
if test_state.classifier_type == "llm" then
|
|
104
|
+
test_state.classifier = LLMClassifier:new(test_state.classifier_config)
|
|
105
|
+
else
|
|
106
|
+
test_state.classifier = FuzzyMatchClassifier:new(test_state.classifier_config)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
test_state.result = test_state.classifier:classify(text)
|
|
110
|
+
end)
|
|
111
|
+
|
|
112
|
+
Step("the result value should be \"(.+)\"", function(ctx, expected)
|
|
113
|
+
assert(test_state.result, "No classification result found")
|
|
114
|
+
assert(test_state.result.value == expected,
|
|
115
|
+
"Expected '" .. expected .. "' but got '" .. tostring(test_state.result.value) .. "'")
|
|
116
|
+
end)
|
|
117
|
+
|
|
118
|
+
Step("the result should have a confidence score", function(ctx)
|
|
119
|
+
assert(test_state.result, "No classification result found")
|
|
120
|
+
assert(test_state.result.confidence ~= nil,
|
|
121
|
+
"Expected confidence score but got nil")
|
|
122
|
+
assert(type(test_state.result.confidence) == "number",
|
|
123
|
+
"Confidence should be a number, got " .. type(test_state.result.confidence))
|
|
124
|
+
assert(test_state.result.confidence >= 0.0 and test_state.result.confidence <= 1.0,
|
|
125
|
+
"Confidence should be between 0 and 1, got " .. tostring(test_state.result.confidence))
|
|
126
|
+
end)
|
|
127
|
+
|
|
128
|
+
Step("the matched_text should be \"(.+)\"", function(ctx, expected)
|
|
129
|
+
assert(test_state.result, "No classification result found")
|
|
130
|
+
assert(test_state.result.matched_text == expected,
|
|
131
|
+
"Expected matched_text '" .. expected .. "' but got '" .. tostring(test_state.result.matched_text) .. "'")
|
|
132
|
+
end)
|
|
133
|
+
|
|
134
|
+
-- BDD Specifications
|
|
135
|
+
Specification([[
|
|
136
|
+
Feature: Classification Class Hierarchy
|
|
137
|
+
As a Tactus developer
|
|
138
|
+
I want to use proper OOP classifiers
|
|
139
|
+
So that I can extend and compose classification behavior
|
|
140
|
+
|
|
141
|
+
Scenario: LLM binary classification
|
|
142
|
+
Given an LLM classifier with classes "Yes" and "No"
|
|
143
|
+
And prompt "Is this a question?"
|
|
144
|
+
When I classify "How are you?"
|
|
145
|
+
Then the result value should be "Yes"
|
|
146
|
+
And the result should have a confidence score
|
|
147
|
+
|
|
148
|
+
Scenario: LLM multi-class classification
|
|
149
|
+
Given an LLM classifier with classes "positive", "negative", and "neutral"
|
|
150
|
+
And prompt "What is the sentiment?"
|
|
151
|
+
When I classify "I love this product!"
|
|
152
|
+
Then the result value should be "positive"
|
|
153
|
+
|
|
154
|
+
Scenario: LLM negative sentiment
|
|
155
|
+
Given an LLM classifier with classes "positive", "negative", and "neutral"
|
|
156
|
+
And prompt "What is the sentiment?"
|
|
157
|
+
When I classify "This is terrible"
|
|
158
|
+
Then the result value should be "negative"
|
|
159
|
+
|
|
160
|
+
Scenario: LLM neutral sentiment
|
|
161
|
+
Given an LLM classifier with classes "positive", "negative", and "neutral"
|
|
162
|
+
And prompt "What is the sentiment?"
|
|
163
|
+
When I classify "The sky is blue"
|
|
164
|
+
Then the result value should be "neutral"
|
|
165
|
+
|
|
166
|
+
Scenario: Fuzzy match with typo
|
|
167
|
+
Given a fuzzy classifier expecting "hello"
|
|
168
|
+
When I classify "helo"
|
|
169
|
+
Then the result value should be "Yes"
|
|
170
|
+
And the matched_text should be "hello"
|
|
171
|
+
|
|
172
|
+
Scenario: Fuzzy match exact
|
|
173
|
+
Given a fuzzy classifier expecting "hello"
|
|
174
|
+
When I classify "hello"
|
|
175
|
+
Then the result value should be "Yes"
|
|
176
|
+
|
|
177
|
+
Scenario: Fuzzy match failure
|
|
178
|
+
Given a fuzzy classifier expecting "hello"
|
|
179
|
+
When I classify "goodbye"
|
|
180
|
+
Then the result value should be "No"
|
|
181
|
+
]])
|
|
182
|
+
|
|
183
|
+
-- Minimal procedure
|
|
184
|
+
Procedure {
|
|
185
|
+
output = {
|
|
186
|
+
result = field.string{required = true}
|
|
187
|
+
},
|
|
188
|
+
function(input)
|
|
189
|
+
return {result = "Classification class hierarchy specs executed"}
|
|
190
|
+
end
|
|
191
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
-- Base Extraction Class
|
|
2
|
+
--
|
|
3
|
+
-- This module provides the foundation for all extractors:
|
|
4
|
+
-- - class() helper for Lua OOP with inheritance
|
|
5
|
+
-- - BaseExtractor abstract base class
|
|
6
|
+
|
|
7
|
+
-- Simple class system for Lua (shared with classify module pattern)
|
|
8
|
+
local function class(base)
|
|
9
|
+
local c = {}
|
|
10
|
+
if base then
|
|
11
|
+
for k, v in pairs(base) do
|
|
12
|
+
c[k] = v
|
|
13
|
+
end
|
|
14
|
+
c._base = base
|
|
15
|
+
end
|
|
16
|
+
c.__index = c
|
|
17
|
+
|
|
18
|
+
function c:new(config)
|
|
19
|
+
local instance = setmetatable({}, self)
|
|
20
|
+
if instance.init then
|
|
21
|
+
instance:init(config)
|
|
22
|
+
end
|
|
23
|
+
return instance
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
return c
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
-- ============================================================================
|
|
30
|
+
-- BaseExtractor (Abstract Base Class)
|
|
31
|
+
-- ============================================================================
|
|
32
|
+
|
|
33
|
+
local BaseExtractor = class()
|
|
34
|
+
|
|
35
|
+
function BaseExtractor:init(config)
|
|
36
|
+
self.config = config or {}
|
|
37
|
+
self.fields = config.fields or {}
|
|
38
|
+
self.prompt = config.prompt or ""
|
|
39
|
+
self.strict = config.strict ~= false -- Default to strict mode
|
|
40
|
+
self.max_retries = config.max_retries or 3
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
function BaseExtractor:extract(text)
|
|
44
|
+
error("BaseExtractor.extract() must be implemented by subclass")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
function BaseExtractor:__call(text)
|
|
48
|
+
return self:extract(text)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
-- Helper to validate extracted fields against schema
|
|
52
|
+
function BaseExtractor:validate_fields(extracted, schema)
|
|
53
|
+
local errors = {}
|
|
54
|
+
local result = {}
|
|
55
|
+
|
|
56
|
+
for field_name, field_type in pairs(schema) do
|
|
57
|
+
local value = extracted[field_name]
|
|
58
|
+
|
|
59
|
+
if value == nil then
|
|
60
|
+
if self.strict then
|
|
61
|
+
table.insert(errors, "Missing required field: " .. field_name)
|
|
62
|
+
end
|
|
63
|
+
result[field_name] = nil
|
|
64
|
+
else
|
|
65
|
+
local validated, err = self:validate_field(field_name, value, field_type)
|
|
66
|
+
if err then
|
|
67
|
+
table.insert(errors, err)
|
|
68
|
+
end
|
|
69
|
+
result[field_name] = validated
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
return result, errors
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
-- Validate a single field value against its type
|
|
77
|
+
function BaseExtractor:validate_field(field_name, value, field_type)
|
|
78
|
+
if value == nil then
|
|
79
|
+
return nil, nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
local type_lower = string.lower(field_type)
|
|
83
|
+
|
|
84
|
+
if type_lower == "string" then
|
|
85
|
+
return tostring(value), nil
|
|
86
|
+
|
|
87
|
+
elseif type_lower == "number" then
|
|
88
|
+
local num = tonumber(value)
|
|
89
|
+
if num then
|
|
90
|
+
return num, nil
|
|
91
|
+
end
|
|
92
|
+
return nil, "Field '" .. field_name .. "' must be a number"
|
|
93
|
+
|
|
94
|
+
elseif type_lower == "integer" then
|
|
95
|
+
local num = tonumber(value)
|
|
96
|
+
if num then
|
|
97
|
+
return math.floor(num), nil
|
|
98
|
+
end
|
|
99
|
+
return nil, "Field '" .. field_name .. "' must be an integer"
|
|
100
|
+
|
|
101
|
+
elseif type_lower == "boolean" then
|
|
102
|
+
if type(value) == "boolean" then
|
|
103
|
+
return value, nil
|
|
104
|
+
end
|
|
105
|
+
if type(value) == "string" then
|
|
106
|
+
local lower = string.lower(value)
|
|
107
|
+
if lower == "true" or lower == "yes" or lower == "1" then
|
|
108
|
+
return true, nil
|
|
109
|
+
end
|
|
110
|
+
if lower == "false" or lower == "no" or lower == "0" then
|
|
111
|
+
return false, nil
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
return nil, "Field '" .. field_name .. "' must be a boolean"
|
|
115
|
+
|
|
116
|
+
elseif type_lower == "list" or type_lower == "array" then
|
|
117
|
+
if type(value) == "table" then
|
|
118
|
+
return value, nil
|
|
119
|
+
end
|
|
120
|
+
return nil, "Field '" .. field_name .. "' must be a list"
|
|
121
|
+
|
|
122
|
+
elseif type_lower == "object" or type_lower == "dict" then
|
|
123
|
+
if type(value) == "table" then
|
|
124
|
+
return value, nil
|
|
125
|
+
end
|
|
126
|
+
return nil, "Field '" .. field_name .. "' must be an object"
|
|
127
|
+
|
|
128
|
+
else
|
|
129
|
+
-- Unknown type, accept any value
|
|
130
|
+
return value, nil
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
-- Export classes and helpers
|
|
135
|
+
return {
|
|
136
|
+
class = class,
|
|
137
|
+
BaseExtractor = BaseExtractor,
|
|
138
|
+
}
|