tactus 0.37.0__py3-none-any.whl → 0.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tactus/__init__.py +1 -1
- tactus/adapters/channels/base.py +2 -0
- tactus/cli/app.py +212 -57
- tactus/core/compaction.py +17 -0
- tactus/core/context_assembler.py +73 -0
- tactus/core/context_models.py +41 -0
- tactus/core/dsl_stubs.py +557 -17
- tactus/core/exceptions.py +8 -0
- tactus/core/execution_context.py +1 -1
- tactus/core/mocking.py +12 -0
- tactus/core/registry.py +142 -0
- tactus/core/retrieval.py +317 -0
- tactus/core/retriever_tasks.py +30 -0
- tactus/core/runtime.py +388 -74
- tactus/dspy/agent.py +143 -82
- tactus/dspy/config.py +16 -0
- tactus/dspy/module.py +12 -1
- tactus/ide/coding_assistant.py +2 -2
- tactus/primitives/handles.py +79 -7
- tactus/sandbox/config.py +1 -1
- tactus/sandbox/container_runner.py +2 -0
- tactus/sandbox/entrypoint.py +51 -8
- tactus/sandbox/protocol.py +5 -0
- tactus/stdlib/README.md +10 -1
- tactus/stdlib/biblicus/__init__.py +3 -0
- tactus/stdlib/biblicus/text.py +189 -0
- tactus/stdlib/tac/biblicus/text.tac +32 -0
- tactus/stdlib/tac/tactus/biblicus.spec.tac +179 -0
- tactus/stdlib/tac/tactus/corpora/base.tac +42 -0
- tactus/stdlib/tac/tactus/corpora/filesystem.tac +5 -0
- tactus/stdlib/tac/tactus/retrievers/base.tac +37 -0
- tactus/stdlib/tac/tactus/retrievers/embedding_index_file.tac +6 -0
- tactus/stdlib/tac/tactus/retrievers/embedding_index_inmemory.tac +6 -0
- tactus/stdlib/tac/tactus/retrievers/index.md +137 -0
- tactus/stdlib/tac/tactus/retrievers/init.tac +11 -0
- tactus/stdlib/tac/tactus/retrievers/sqlite_full_text_search.tac +6 -0
- tactus/stdlib/tac/tactus/retrievers/tf_vector.tac +6 -0
- tactus/testing/behave_integration.py +2 -0
- tactus/testing/context.py +4 -0
- tactus/validation/semantic_visitor.py +357 -6
- tactus/validation/validator.py +142 -2
- {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/METADATA +3 -2
- {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/RECORD +46 -28
- {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/WHEEL +0 -0
- {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/entry_points.txt +0 -0
- {tactus-0.37.0.dist-info → tactus-0.38.0.dist-info}/licenses/LICENSE +0 -0
tactus/sandbox/entrypoint.py
CHANGED
|
@@ -86,6 +86,7 @@ async def execute_procedure(
|
|
|
86
86
|
source_file_path: Optional[str] = None,
|
|
87
87
|
format: str = "lua",
|
|
88
88
|
run_id: Optional[str] = None,
|
|
89
|
+
task_name: Optional[str] = None,
|
|
89
90
|
) -> Any:
|
|
90
91
|
"""
|
|
91
92
|
Execute a procedure using TactusRuntime.
|
|
@@ -163,6 +164,7 @@ async def execute_procedure(
|
|
|
163
164
|
source=source,
|
|
164
165
|
context=params,
|
|
165
166
|
format=format,
|
|
167
|
+
task_name=task_name,
|
|
166
168
|
)
|
|
167
169
|
|
|
168
170
|
# CRITICAL: Flush pending log events before returning
|
|
@@ -181,6 +183,7 @@ async def main_async() -> int:
|
|
|
181
183
|
from tactus.sandbox.protocol import (
|
|
182
184
|
ExecutionRequest,
|
|
183
185
|
ExecutionResult,
|
|
186
|
+
ExecutionStatus,
|
|
184
187
|
)
|
|
185
188
|
|
|
186
189
|
start_time = time.time()
|
|
@@ -200,14 +203,54 @@ async def main_async() -> int:
|
|
|
200
203
|
request = ExecutionRequest(**request_data)
|
|
201
204
|
logger.info("Executing procedure (id=%s)", request.execution_id)
|
|
202
205
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
206
|
+
try:
|
|
207
|
+
# Execute procedure
|
|
208
|
+
proc_result = await execute_procedure(
|
|
209
|
+
source=request.source,
|
|
210
|
+
params=request.params,
|
|
211
|
+
source_file_path=request.source_file_path,
|
|
212
|
+
format=request.format,
|
|
213
|
+
run_id=request.run_id,
|
|
214
|
+
task_name=request.task_name,
|
|
215
|
+
)
|
|
216
|
+
except Exception as exc:
|
|
217
|
+
# Provide structured results for common control-flow exceptions.
|
|
218
|
+
from tactus.core.exceptions import ProcedureWaitingForHuman, TaskSelectionRequired
|
|
219
|
+
|
|
220
|
+
duration = time.time() - start_time
|
|
221
|
+
|
|
222
|
+
if isinstance(exc, TaskSelectionRequired):
|
|
223
|
+
result = ExecutionResult.failure(
|
|
224
|
+
error=str(exc),
|
|
225
|
+
error_type=type(exc).__name__,
|
|
226
|
+
traceback=None,
|
|
227
|
+
duration_seconds=duration,
|
|
228
|
+
exit_code=2,
|
|
229
|
+
metadata={"tasks": list(exc.tasks)},
|
|
230
|
+
)
|
|
231
|
+
write_result_to_stdout(result)
|
|
232
|
+
return 2
|
|
233
|
+
|
|
234
|
+
if isinstance(exc, ProcedureWaitingForHuman):
|
|
235
|
+
result = ExecutionResult(
|
|
236
|
+
status=ExecutionStatus.CANCELLED,
|
|
237
|
+
result=None,
|
|
238
|
+
error=str(exc),
|
|
239
|
+
error_type=type(exc).__name__,
|
|
240
|
+
traceback=None,
|
|
241
|
+
duration_seconds=duration,
|
|
242
|
+
exit_code=0,
|
|
243
|
+
logs=[],
|
|
244
|
+
metadata={
|
|
245
|
+
"waiting_for_human": True,
|
|
246
|
+
"procedure_id": exc.procedure_id,
|
|
247
|
+
"pending_message_id": exc.pending_message_id,
|
|
248
|
+
},
|
|
249
|
+
)
|
|
250
|
+
write_result_to_stdout(result)
|
|
251
|
+
return 0
|
|
252
|
+
|
|
253
|
+
raise
|
|
211
254
|
|
|
212
255
|
# Create success result
|
|
213
256
|
duration = time.time() - start_time
|
tactus/sandbox/protocol.py
CHANGED
|
@@ -60,6 +60,9 @@ class ExecutionRequest:
|
|
|
60
60
|
# Source format: "lua" for .tac files, "yaml" for legacy YAML format
|
|
61
61
|
format: str = "lua"
|
|
62
62
|
|
|
63
|
+
# Optional task name to execute
|
|
64
|
+
task_name: Optional[str] = None
|
|
65
|
+
|
|
63
66
|
def to_json(self) -> str:
|
|
64
67
|
"""Serialize to JSON string."""
|
|
65
68
|
return json.dumps(asdict(self), indent=None, separators=(",", ":"))
|
|
@@ -148,6 +151,7 @@ class ExecutionResult:
|
|
|
148
151
|
duration_seconds: float = 0.0,
|
|
149
152
|
exit_code: int = 1,
|
|
150
153
|
logs: Optional[list[dict[str, Any]]] = None,
|
|
154
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
151
155
|
) -> "ExecutionResult":
|
|
152
156
|
"""Create a failed result."""
|
|
153
157
|
return cls(
|
|
@@ -158,6 +162,7 @@ class ExecutionResult:
|
|
|
158
162
|
duration_seconds=duration_seconds,
|
|
159
163
|
exit_code=exit_code,
|
|
160
164
|
logs=logs or [],
|
|
165
|
+
metadata=metadata or {},
|
|
161
166
|
)
|
|
162
167
|
|
|
163
168
|
@classmethod
|
tactus/stdlib/README.md
CHANGED
|
@@ -22,6 +22,15 @@ tactus/stdlib/
|
|
|
22
22
|
│ └── fuzzy.py # Fuzzy string matching
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
+
## Available Modules
|
|
26
|
+
|
|
27
|
+
- `tactus.classify` - LLM and fuzzy classification
|
|
28
|
+
- `tactus.extract` - Structured extraction utilities
|
|
29
|
+
- `tactus.generate` - LLM-based generation helpers
|
|
30
|
+
- `tactus.io.*` - File I/O helpers (json, csv, tsv, file)
|
|
31
|
+
- `biblicus.text` - Biblicus-backed text utilities
|
|
32
|
+
- `tactus.retrievers.*` - Biblicus-backed retrievers
|
|
33
|
+
|
|
25
34
|
## Testing
|
|
26
35
|
|
|
27
36
|
Run all stdlib specs:
|
|
@@ -54,7 +63,7 @@ The Classify primitive demonstrates the stdlib pattern:
|
|
|
54
63
|
**Current Status**:
|
|
55
64
|
- ✅ Specs pass with Python implementation
|
|
56
65
|
- ✅ Tactus reference implementation exists
|
|
57
|
-
-
|
|
66
|
+
- Next: Module loading system needed to use Tactus impl
|
|
58
67
|
|
|
59
68
|
## Adding New Primitives
|
|
60
69
|
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""
|
|
2
|
+
tactus.biblicus.text - Biblicus text utilities for Tactus.
|
|
3
|
+
|
|
4
|
+
This module exposes Biblicus text helpers to Lua via the stdlib loader.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Any, Dict, List
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _require_biblicus_text() -> Dict[str, Any]:
|
|
14
|
+
try:
|
|
15
|
+
from biblicus.ai.models import LlmClientConfig
|
|
16
|
+
from biblicus.text import (
|
|
17
|
+
apply_text_annotate,
|
|
18
|
+
apply_text_extract,
|
|
19
|
+
apply_text_link,
|
|
20
|
+
apply_text_redact,
|
|
21
|
+
apply_text_slice,
|
|
22
|
+
)
|
|
23
|
+
from biblicus.text.markup import (
|
|
24
|
+
parse_span_markup,
|
|
25
|
+
strip_span_tags,
|
|
26
|
+
summarize_span_context,
|
|
27
|
+
)
|
|
28
|
+
from biblicus.text.models import (
|
|
29
|
+
TextAnnotateRequest,
|
|
30
|
+
TextExtractRequest,
|
|
31
|
+
TextLinkRequest,
|
|
32
|
+
TextRedactRequest,
|
|
33
|
+
TextSliceRequest,
|
|
34
|
+
)
|
|
35
|
+
except ModuleNotFoundError as exc:
|
|
36
|
+
raise ValueError(
|
|
37
|
+
"Biblicus text utilities are unavailable. Install a Biblicus build "
|
|
38
|
+
"that includes biblicus.text to use the text stdlib."
|
|
39
|
+
) from exc
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
"LlmClientConfig": LlmClientConfig,
|
|
43
|
+
"apply_text_annotate": apply_text_annotate,
|
|
44
|
+
"apply_text_extract": apply_text_extract,
|
|
45
|
+
"apply_text_link": apply_text_link,
|
|
46
|
+
"apply_text_redact": apply_text_redact,
|
|
47
|
+
"apply_text_slice": apply_text_slice,
|
|
48
|
+
"parse_span_markup": parse_span_markup,
|
|
49
|
+
"strip_span_tags": strip_span_tags,
|
|
50
|
+
"summarize_span_context": summarize_span_context,
|
|
51
|
+
"TextAnnotateRequest": TextAnnotateRequest,
|
|
52
|
+
"TextExtractRequest": TextExtractRequest,
|
|
53
|
+
"TextLinkRequest": TextLinkRequest,
|
|
54
|
+
"TextRedactRequest": TextRedactRequest,
|
|
55
|
+
"TextSliceRequest": TextSliceRequest,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _normalize_client_config(client: Any) -> Any:
|
|
60
|
+
if not isinstance(client, dict):
|
|
61
|
+
raise ValueError("client must be a table with provider and model")
|
|
62
|
+
|
|
63
|
+
payload = dict(client)
|
|
64
|
+
model = payload.get("model")
|
|
65
|
+
provider = payload.get("provider")
|
|
66
|
+
if not model:
|
|
67
|
+
raise ValueError("client.model is required")
|
|
68
|
+
|
|
69
|
+
if provider is None and isinstance(model, str) and "/" in model:
|
|
70
|
+
provider, model = model.split("/", 1)
|
|
71
|
+
elif provider is not None and isinstance(model, str) and model.startswith(f"{provider}/"):
|
|
72
|
+
model = model.split("/", 1)[1]
|
|
73
|
+
|
|
74
|
+
if provider is None:
|
|
75
|
+
raise ValueError("client.provider is required when model lacks a provider prefix")
|
|
76
|
+
|
|
77
|
+
payload["provider"] = provider
|
|
78
|
+
payload["model"] = model
|
|
79
|
+
|
|
80
|
+
biblicus = _require_biblicus_text()
|
|
81
|
+
return biblicus["LlmClientConfig"](**payload)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _prepare_request(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
85
|
+
if not isinstance(request, dict):
|
|
86
|
+
raise ValueError("request must be a table")
|
|
87
|
+
|
|
88
|
+
payload = dict(request)
|
|
89
|
+
mock_mode = os.environ.get("TACTUS_MOCK_MODE")
|
|
90
|
+
if mock_mode == "0":
|
|
91
|
+
payload.pop("mock_marked_up_text", None)
|
|
92
|
+
client = payload.get("client")
|
|
93
|
+
if client is None:
|
|
94
|
+
raise ValueError("client is required")
|
|
95
|
+
payload["client"] = _normalize_client_config(client)
|
|
96
|
+
return payload
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _maybe_mock(tool_name: str, payload: Dict[str, Any]) -> Dict[str, Any] | None:
|
|
100
|
+
try:
|
|
101
|
+
from tactus.core.mocking import get_current_mock_manager
|
|
102
|
+
except Exception:
|
|
103
|
+
return None
|
|
104
|
+
mock_manager = get_current_mock_manager()
|
|
105
|
+
if mock_manager is None:
|
|
106
|
+
return None
|
|
107
|
+
mock_result = mock_manager.get_mock_response(tool_name, payload)
|
|
108
|
+
if mock_result is None:
|
|
109
|
+
return None
|
|
110
|
+
mock_manager.record_call(tool_name, payload, mock_result)
|
|
111
|
+
return mock_result
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def extract(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
115
|
+
payload = _prepare_request(request)
|
|
116
|
+
mock_result = _maybe_mock("biblicus.text.extract", payload)
|
|
117
|
+
if mock_result is not None:
|
|
118
|
+
return mock_result
|
|
119
|
+
biblicus = _require_biblicus_text()
|
|
120
|
+
result = biblicus["apply_text_extract"](biblicus["TextExtractRequest"](**payload))
|
|
121
|
+
return result.model_dump()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def slice(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
125
|
+
payload = _prepare_request(request)
|
|
126
|
+
mock_result = _maybe_mock("biblicus.text.slice", payload)
|
|
127
|
+
if mock_result is not None:
|
|
128
|
+
return mock_result
|
|
129
|
+
biblicus = _require_biblicus_text()
|
|
130
|
+
result = biblicus["apply_text_slice"](biblicus["TextSliceRequest"](**payload))
|
|
131
|
+
return result.model_dump()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def annotate(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
135
|
+
payload = _prepare_request(request)
|
|
136
|
+
mock_result = _maybe_mock("biblicus.text.annotate", payload)
|
|
137
|
+
if mock_result is not None:
|
|
138
|
+
return mock_result
|
|
139
|
+
biblicus = _require_biblicus_text()
|
|
140
|
+
result = biblicus["apply_text_annotate"](biblicus["TextAnnotateRequest"](**payload))
|
|
141
|
+
return result.model_dump()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def redact(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
145
|
+
payload = _prepare_request(request)
|
|
146
|
+
mock_result = _maybe_mock("biblicus.text.redact", payload)
|
|
147
|
+
if mock_result is not None:
|
|
148
|
+
return mock_result
|
|
149
|
+
biblicus = _require_biblicus_text()
|
|
150
|
+
result = biblicus["apply_text_redact"](biblicus["TextRedactRequest"](**payload))
|
|
151
|
+
return result.model_dump()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def link(request: Dict[str, Any]) -> Dict[str, Any]:
|
|
155
|
+
payload = _prepare_request(request)
|
|
156
|
+
mock_result = _maybe_mock("biblicus.text.link", payload)
|
|
157
|
+
if mock_result is not None:
|
|
158
|
+
return mock_result
|
|
159
|
+
biblicus = _require_biblicus_text()
|
|
160
|
+
result = biblicus["apply_text_link"](biblicus["TextLinkRequest"](**payload))
|
|
161
|
+
return result.model_dump()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def strip_span_tags(marked_up_text: str) -> str:
|
|
165
|
+
biblicus = _require_biblicus_text()
|
|
166
|
+
return biblicus["strip_span_tags"](marked_up_text)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def parse_span_markup(marked_up_text: str) -> List[Dict[str, Any]]:
|
|
170
|
+
biblicus = _require_biblicus_text()
|
|
171
|
+
spans = biblicus["parse_span_markup"](marked_up_text)
|
|
172
|
+
return [span.model_dump() for span in spans]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def summarize_span_context(marked_up_text: str, span_indices: List[int]) -> List[str]:
|
|
176
|
+
biblicus = _require_biblicus_text()
|
|
177
|
+
return biblicus["summarize_span_context"](marked_up_text, span_indices)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
__tactus_exports__ = [
|
|
181
|
+
"extract",
|
|
182
|
+
"slice",
|
|
183
|
+
"annotate",
|
|
184
|
+
"redact",
|
|
185
|
+
"link",
|
|
186
|
+
"strip_span_tags",
|
|
187
|
+
"parse_span_markup",
|
|
188
|
+
"summarize_span_context",
|
|
189
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
--[[doc
|
|
2
|
+
# biblicus.text
|
|
3
|
+
|
|
4
|
+
Biblicus-powered text utilities for Tactus.
|
|
5
|
+
|
|
6
|
+
This module exposes Biblicus text helpers through the Tactus stdlib. It mirrors
|
|
7
|
+
the Biblicus request/response shapes while keeping configuration in Tactus.
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
```lua
|
|
12
|
+
local text = require("biblicus.text")
|
|
13
|
+
|
|
14
|
+
local result = text.extract({
|
|
15
|
+
text = "Alice met Bob in Paris.",
|
|
16
|
+
client = {
|
|
17
|
+
provider = "openai",
|
|
18
|
+
model = "gpt-4o-mini"
|
|
19
|
+
},
|
|
20
|
+
prompt_template = "Extract the person names in the text.",
|
|
21
|
+
})
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Testing hook
|
|
25
|
+
|
|
26
|
+
For deterministic tests, use Tactus `Mocks {}` to return stable Biblicus
|
|
27
|
+
results without touching the model.
|
|
28
|
+
]]
|
|
29
|
+
|
|
30
|
+
local text = require("tactus.biblicus.text")
|
|
31
|
+
|
|
32
|
+
return text
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
--[[doc
|
|
2
|
+
# Biblicus Text Utilities
|
|
3
|
+
|
|
4
|
+
This specification covers the Biblicus-backed `biblicus.text` stdlib module.
|
|
5
|
+
It exercises the deterministic mock path plus markup helper functions.
|
|
6
|
+
]]
|
|
7
|
+
|
|
8
|
+
local text = require("biblicus.text")
|
|
9
|
+
|
|
10
|
+
local test_state = {}
|
|
11
|
+
local function build_request(text_value)
|
|
12
|
+
local client = {
|
|
13
|
+
provider = "openai",
|
|
14
|
+
model = "gpt-4o-mini"
|
|
15
|
+
}
|
|
16
|
+
return {
|
|
17
|
+
text = text_value,
|
|
18
|
+
client = client,
|
|
19
|
+
prompt_template = "Return only the updated markup."
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
Step("a biblicus request for \"(.+)\"", function(ctx, raw_text)
|
|
24
|
+
test_state.request = build_request(raw_text)
|
|
25
|
+
end)
|
|
26
|
+
|
|
27
|
+
Step("I apply mock extraction markup \"(.+)\"", function(ctx, markup)
|
|
28
|
+
local request = test_state.request
|
|
29
|
+
request.mock_marked_up_text = markup
|
|
30
|
+
local ok, result = pcall(text.extract, request)
|
|
31
|
+
assert(ok, "text.extract failed: " .. tostring(result))
|
|
32
|
+
test_state.result = result
|
|
33
|
+
end)
|
|
34
|
+
|
|
35
|
+
Step("the extracted span should be \"(.+)\"", function(ctx, expected)
|
|
36
|
+
local spans = test_state.result.spans or {}
|
|
37
|
+
assert(#spans == 1, "Expected one span")
|
|
38
|
+
assert(spans[1].text == expected,
|
|
39
|
+
"Expected span text '" .. expected .. "' but got '" .. tostring(spans[1].text) .. "'")
|
|
40
|
+
end)
|
|
41
|
+
|
|
42
|
+
Step("I apply mock annotation markup \"(.+)\"", function(ctx, markup)
|
|
43
|
+
local request = test_state.request
|
|
44
|
+
request.mock_marked_up_text = markup
|
|
45
|
+
local ok, result = pcall(text.annotate, request)
|
|
46
|
+
assert(ok, "text.annotate failed: " .. tostring(result))
|
|
47
|
+
test_state.result = result
|
|
48
|
+
end)
|
|
49
|
+
|
|
50
|
+
Step("the annotation span label should be \"(.+)\"", function(ctx, expected)
|
|
51
|
+
local spans = test_state.result.spans or {}
|
|
52
|
+
assert(#spans == 1, "Expected one span")
|
|
53
|
+
assert(spans[1].attributes.label == expected,
|
|
54
|
+
"Expected label '" .. expected .. "' but got '" .. tostring(spans[1].attributes.label) .. "'")
|
|
55
|
+
end)
|
|
56
|
+
|
|
57
|
+
Step("I apply mock link markup \"(.+)\"", function(ctx, markup)
|
|
58
|
+
local request = test_state.request
|
|
59
|
+
request.mock_marked_up_text = markup
|
|
60
|
+
request.id_prefix = "link_"
|
|
61
|
+
local ok, result = pcall(text.link, request)
|
|
62
|
+
assert(ok, "text.link failed: " .. tostring(result))
|
|
63
|
+
test_state.result = result
|
|
64
|
+
end)
|
|
65
|
+
|
|
66
|
+
Step("the link spans should include ids", function(ctx)
|
|
67
|
+
local spans = test_state.result.spans or {}
|
|
68
|
+
assert(#spans == 2, "Expected two spans")
|
|
69
|
+
assert(spans[1].attributes.id == "link_1", "Expected first span id link_1")
|
|
70
|
+
assert(spans[2].attributes.ref == "link_1", "Expected second span ref link_1")
|
|
71
|
+
end)
|
|
72
|
+
|
|
73
|
+
Step("I apply mock redaction markup \"(.+)\"", function(ctx, markup)
|
|
74
|
+
local request = test_state.request
|
|
75
|
+
request.mock_marked_up_text = markup
|
|
76
|
+
local ok, result = pcall(text.redact, request)
|
|
77
|
+
assert(ok, "text.redact failed: " .. tostring(result))
|
|
78
|
+
test_state.result = result
|
|
79
|
+
end)
|
|
80
|
+
|
|
81
|
+
Step("the redaction should return one span", function(ctx)
|
|
82
|
+
local spans = test_state.result.spans or {}
|
|
83
|
+
assert(#spans == 1, "Expected one redaction span")
|
|
84
|
+
end)
|
|
85
|
+
|
|
86
|
+
Step("I apply mock slice markup \"(.+)\"", function(ctx, markup)
|
|
87
|
+
local request = test_state.request
|
|
88
|
+
request.mock_marked_up_text = markup
|
|
89
|
+
local ok, result = pcall(text.slice, request)
|
|
90
|
+
assert(ok, "text.slice failed: " .. tostring(result))
|
|
91
|
+
test_state.result = result
|
|
92
|
+
end)
|
|
93
|
+
|
|
94
|
+
Step("the slices should be \"(.+)\" and \"(.+)\"", function(ctx, first, second)
|
|
95
|
+
local slices = test_state.result.slices or {}
|
|
96
|
+
assert(#slices == 2, "Expected two slices")
|
|
97
|
+
assert(slices[1].text == first, "Expected first slice '" .. first .. "'")
|
|
98
|
+
assert(slices[2].text == second, "Expected second slice '" .. second .. "'")
|
|
99
|
+
end)
|
|
100
|
+
|
|
101
|
+
Step("I strip span tags from \"(.+)\"", function(ctx, markup)
|
|
102
|
+
test_state.cleaned = text.strip_span_tags(markup)
|
|
103
|
+
end)
|
|
104
|
+
|
|
105
|
+
Step("the stripped text should be \"(.+)\"", function(ctx, expected)
|
|
106
|
+
assert(test_state.cleaned == expected,
|
|
107
|
+
"Expected stripped text '" .. expected .. "' but got '" .. tostring(test_state.cleaned) .. "'")
|
|
108
|
+
end)
|
|
109
|
+
|
|
110
|
+
Step("I parse spans from \"(.+)\"", function(ctx, markup)
|
|
111
|
+
test_state.parsed_spans = text.parse_span_markup(markup)
|
|
112
|
+
end)
|
|
113
|
+
|
|
114
|
+
Step("the parsed span text should be \"(.+)\"", function(ctx, expected)
|
|
115
|
+
local spans = test_state.parsed_spans or {}
|
|
116
|
+
assert(#spans == 1, "Expected one parsed span")
|
|
117
|
+
assert(spans[1].text == expected,
|
|
118
|
+
"Expected span text '" .. expected .. "' but got '" .. tostring(spans[1].text) .. "'")
|
|
119
|
+
end)
|
|
120
|
+
|
|
121
|
+
Step("I summarize span 1 from \"(.+)\"", function(ctx, markup)
|
|
122
|
+
local summaries = text.summarize_span_context(markup, {1})
|
|
123
|
+
test_state.summary = summaries[1]
|
|
124
|
+
end)
|
|
125
|
+
|
|
126
|
+
Step("the summary should be \"(.+)\"", function(ctx, expected)
|
|
127
|
+
assert(test_state.summary == expected,
|
|
128
|
+
"Expected summary '" .. expected .. "' but got '" .. tostring(test_state.summary) .. "'")
|
|
129
|
+
end)
|
|
130
|
+
|
|
131
|
+
Specification([[
|
|
132
|
+
Feature: Biblicus Text Utilities
|
|
133
|
+
As a Tactus developer
|
|
134
|
+
I want to access Biblicus text utilities from the stdlib
|
|
135
|
+
So that I can reuse Biblicus text processing in workflows
|
|
136
|
+
|
|
137
|
+
Scenario: Extract spans from mock markup
|
|
138
|
+
Given a biblicus request for "Alice met Bob."
|
|
139
|
+
When I apply mock extraction markup "Alice met <span>Bob</span>."
|
|
140
|
+
Then the extracted span should be "Bob"
|
|
141
|
+
|
|
142
|
+
Scenario: Annotate spans with attributes
|
|
143
|
+
Given a biblicus request for "Ada wrote code."
|
|
144
|
+
When I apply mock annotation markup "Ada wrote <span label=\"artifact\">code</span>."
|
|
145
|
+
Then the annotation span label should be "artifact"
|
|
146
|
+
|
|
147
|
+
Scenario: Link repeated spans
|
|
148
|
+
Given a biblicus request for "Alice met Bob and Bob waved."
|
|
149
|
+
When I apply mock link markup "Alice met <span id=\"link_1\">Bob</span> and <span ref=\"link_1\">Bob</span> waved."
|
|
150
|
+
Then the link spans should include ids
|
|
151
|
+
|
|
152
|
+
Scenario: Redact spans without types
|
|
153
|
+
Given a biblicus request for "The secret is safe."
|
|
154
|
+
When I apply mock redaction markup "The <span>secret</span> is safe."
|
|
155
|
+
Then the redaction should return one span
|
|
156
|
+
|
|
157
|
+
Scenario: Slice text into segments
|
|
158
|
+
Given a biblicus request for "First sentence. Second sentence."
|
|
159
|
+
When I apply mock slice markup "First sentence.<slice/> Second sentence."
|
|
160
|
+
Then the slices should be "First sentence." and " Second sentence."
|
|
161
|
+
|
|
162
|
+
Scenario: Use markup helpers
|
|
163
|
+
Given a biblicus request for "Ignored."
|
|
164
|
+
When I strip span tags from "Hello <span>world</span>."
|
|
165
|
+
Then the stripped text should be "Hello world."
|
|
166
|
+
When I parse spans from "Hello <span>world</span>."
|
|
167
|
+
Then the parsed span text should be "world"
|
|
168
|
+
When I summarize span 1 from "Hello <span>world</span>."
|
|
169
|
+
Then the summary should be "Span 1: world"
|
|
170
|
+
]])
|
|
171
|
+
|
|
172
|
+
Procedure {
|
|
173
|
+
output = {
|
|
174
|
+
result = field.string{required = true}
|
|
175
|
+
},
|
|
176
|
+
function(input)
|
|
177
|
+
return {result = "Biblicus text stdlib specs executed"}
|
|
178
|
+
end
|
|
179
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
local function merge_defaults(defaults, config)
|
|
2
|
+
local merged = {}
|
|
3
|
+
if defaults then
|
|
4
|
+
for key, value in pairs(defaults) do
|
|
5
|
+
merged[key] = value
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
if config then
|
|
9
|
+
for key, value in pairs(config) do
|
|
10
|
+
merged[key] = value
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
return merged
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
local function normalize_corpus_config(config, defaults)
|
|
17
|
+
local merged = merge_defaults(defaults, config or {})
|
|
18
|
+
if merged.root ~= nil and merged.corpus_root == nil then
|
|
19
|
+
merged.corpus_root = merged.root
|
|
20
|
+
merged.root = nil
|
|
21
|
+
end
|
|
22
|
+
return merged
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
local function wrap_corpus(defaults)
|
|
26
|
+
local function constructor(config)
|
|
27
|
+
return _tactus_internal_corpus(normalize_corpus_config(config, defaults))
|
|
28
|
+
end
|
|
29
|
+
local cls = {}
|
|
30
|
+
function cls:new(config)
|
|
31
|
+
return constructor(config)
|
|
32
|
+
end
|
|
33
|
+
return setmetatable(cls, {
|
|
34
|
+
__call = function(_, config)
|
|
35
|
+
return constructor(config)
|
|
36
|
+
end,
|
|
37
|
+
})
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
wrap_corpus = wrap_corpus,
|
|
42
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
local function merge_defaults(defaults, config)
|
|
2
|
+
local merged = {}
|
|
3
|
+
if defaults then
|
|
4
|
+
for key, value in pairs(defaults) do
|
|
5
|
+
merged[key] = value
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
if config then
|
|
9
|
+
for key, value in pairs(config) do
|
|
10
|
+
merged[key] = value
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
return merged
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
local corpora_base = require("tactus.corpora.base")
|
|
17
|
+
|
|
18
|
+
local function wrap_retriever(defaults)
|
|
19
|
+
local function constructor(config)
|
|
20
|
+
local merged = merge_defaults(defaults, config or {})
|
|
21
|
+
return _tactus_internal_retriever(merged)
|
|
22
|
+
end
|
|
23
|
+
local cls = {}
|
|
24
|
+
function cls:new(config)
|
|
25
|
+
return constructor(config)
|
|
26
|
+
end
|
|
27
|
+
return setmetatable(cls, {
|
|
28
|
+
__call = function(_, config)
|
|
29
|
+
return constructor(config)
|
|
30
|
+
end,
|
|
31
|
+
})
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
return {
|
|
35
|
+
wrap_corpus = corpora_base.wrap_corpus,
|
|
36
|
+
wrap_retriever = wrap_retriever,
|
|
37
|
+
}
|