tactus 0.31.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. tactus/__init__.py +49 -0
  2. tactus/adapters/__init__.py +9 -0
  3. tactus/adapters/broker_log.py +76 -0
  4. tactus/adapters/cli_hitl.py +189 -0
  5. tactus/adapters/cli_log.py +223 -0
  6. tactus/adapters/cost_collector_log.py +56 -0
  7. tactus/adapters/file_storage.py +367 -0
  8. tactus/adapters/http_callback_log.py +109 -0
  9. tactus/adapters/ide_log.py +71 -0
  10. tactus/adapters/lua_tools.py +336 -0
  11. tactus/adapters/mcp.py +289 -0
  12. tactus/adapters/mcp_manager.py +196 -0
  13. tactus/adapters/memory.py +53 -0
  14. tactus/adapters/plugins.py +419 -0
  15. tactus/backends/http_backend.py +58 -0
  16. tactus/backends/model_backend.py +35 -0
  17. tactus/backends/pytorch_backend.py +110 -0
  18. tactus/broker/__init__.py +12 -0
  19. tactus/broker/client.py +247 -0
  20. tactus/broker/protocol.py +183 -0
  21. tactus/broker/server.py +1123 -0
  22. tactus/broker/stdio.py +12 -0
  23. tactus/cli/__init__.py +7 -0
  24. tactus/cli/app.py +2245 -0
  25. tactus/cli/commands/__init__.py +0 -0
  26. tactus/core/__init__.py +32 -0
  27. tactus/core/config_manager.py +790 -0
  28. tactus/core/dependencies/__init__.py +14 -0
  29. tactus/core/dependencies/registry.py +180 -0
  30. tactus/core/dsl_stubs.py +2117 -0
  31. tactus/core/exceptions.py +66 -0
  32. tactus/core/execution_context.py +480 -0
  33. tactus/core/lua_sandbox.py +508 -0
  34. tactus/core/message_history_manager.py +236 -0
  35. tactus/core/mocking.py +286 -0
  36. tactus/core/output_validator.py +291 -0
  37. tactus/core/registry.py +499 -0
  38. tactus/core/runtime.py +2907 -0
  39. tactus/core/template_resolver.py +142 -0
  40. tactus/core/yaml_parser.py +301 -0
  41. tactus/docker/Dockerfile +61 -0
  42. tactus/docker/entrypoint.sh +69 -0
  43. tactus/dspy/__init__.py +39 -0
  44. tactus/dspy/agent.py +1144 -0
  45. tactus/dspy/broker_lm.py +181 -0
  46. tactus/dspy/config.py +212 -0
  47. tactus/dspy/history.py +196 -0
  48. tactus/dspy/module.py +405 -0
  49. tactus/dspy/prediction.py +318 -0
  50. tactus/dspy/signature.py +185 -0
  51. tactus/formatting/__init__.py +7 -0
  52. tactus/formatting/formatter.py +437 -0
  53. tactus/ide/__init__.py +9 -0
  54. tactus/ide/coding_assistant.py +343 -0
  55. tactus/ide/server.py +2223 -0
  56. tactus/primitives/__init__.py +49 -0
  57. tactus/primitives/control.py +168 -0
  58. tactus/primitives/file.py +229 -0
  59. tactus/primitives/handles.py +378 -0
  60. tactus/primitives/host.py +94 -0
  61. tactus/primitives/human.py +342 -0
  62. tactus/primitives/json.py +189 -0
  63. tactus/primitives/log.py +187 -0
  64. tactus/primitives/message_history.py +157 -0
  65. tactus/primitives/model.py +163 -0
  66. tactus/primitives/procedure.py +564 -0
  67. tactus/primitives/procedure_callable.py +318 -0
  68. tactus/primitives/retry.py +155 -0
  69. tactus/primitives/session.py +152 -0
  70. tactus/primitives/state.py +182 -0
  71. tactus/primitives/step.py +209 -0
  72. tactus/primitives/system.py +93 -0
  73. tactus/primitives/tool.py +375 -0
  74. tactus/primitives/tool_handle.py +279 -0
  75. tactus/primitives/toolset.py +229 -0
  76. tactus/protocols/__init__.py +38 -0
  77. tactus/protocols/chat_recorder.py +81 -0
  78. tactus/protocols/config.py +97 -0
  79. tactus/protocols/cost.py +31 -0
  80. tactus/protocols/hitl.py +71 -0
  81. tactus/protocols/log_handler.py +27 -0
  82. tactus/protocols/models.py +355 -0
  83. tactus/protocols/result.py +33 -0
  84. tactus/protocols/storage.py +90 -0
  85. tactus/providers/__init__.py +13 -0
  86. tactus/providers/base.py +92 -0
  87. tactus/providers/bedrock.py +117 -0
  88. tactus/providers/google.py +105 -0
  89. tactus/providers/openai.py +98 -0
  90. tactus/sandbox/__init__.py +63 -0
  91. tactus/sandbox/config.py +171 -0
  92. tactus/sandbox/container_runner.py +1099 -0
  93. tactus/sandbox/docker_manager.py +433 -0
  94. tactus/sandbox/entrypoint.py +227 -0
  95. tactus/sandbox/protocol.py +213 -0
  96. tactus/stdlib/__init__.py +10 -0
  97. tactus/stdlib/io/__init__.py +13 -0
  98. tactus/stdlib/io/csv.py +88 -0
  99. tactus/stdlib/io/excel.py +136 -0
  100. tactus/stdlib/io/file.py +90 -0
  101. tactus/stdlib/io/fs.py +154 -0
  102. tactus/stdlib/io/hdf5.py +121 -0
  103. tactus/stdlib/io/json.py +109 -0
  104. tactus/stdlib/io/parquet.py +83 -0
  105. tactus/stdlib/io/tsv.py +88 -0
  106. tactus/stdlib/loader.py +274 -0
  107. tactus/stdlib/tac/tactus/tools/done.tac +33 -0
  108. tactus/stdlib/tac/tactus/tools/log.tac +50 -0
  109. tactus/testing/README.md +273 -0
  110. tactus/testing/__init__.py +61 -0
  111. tactus/testing/behave_integration.py +380 -0
  112. tactus/testing/context.py +486 -0
  113. tactus/testing/eval_models.py +114 -0
  114. tactus/testing/evaluation_runner.py +222 -0
  115. tactus/testing/evaluators.py +634 -0
  116. tactus/testing/events.py +94 -0
  117. tactus/testing/gherkin_parser.py +134 -0
  118. tactus/testing/mock_agent.py +315 -0
  119. tactus/testing/mock_dependencies.py +234 -0
  120. tactus/testing/mock_hitl.py +171 -0
  121. tactus/testing/mock_registry.py +168 -0
  122. tactus/testing/mock_tools.py +133 -0
  123. tactus/testing/models.py +115 -0
  124. tactus/testing/pydantic_eval_runner.py +508 -0
  125. tactus/testing/steps/__init__.py +13 -0
  126. tactus/testing/steps/builtin.py +902 -0
  127. tactus/testing/steps/custom.py +69 -0
  128. tactus/testing/steps/registry.py +68 -0
  129. tactus/testing/test_runner.py +489 -0
  130. tactus/tracing/__init__.py +5 -0
  131. tactus/tracing/trace_manager.py +417 -0
  132. tactus/utils/__init__.py +1 -0
  133. tactus/utils/cost_calculator.py +72 -0
  134. tactus/utils/model_pricing.py +132 -0
  135. tactus/utils/safe_file_library.py +502 -0
  136. tactus/utils/safe_libraries.py +234 -0
  137. tactus/validation/LuaLexerBase.py +66 -0
  138. tactus/validation/LuaParserBase.py +23 -0
  139. tactus/validation/README.md +224 -0
  140. tactus/validation/__init__.py +7 -0
  141. tactus/validation/error_listener.py +21 -0
  142. tactus/validation/generated/LuaLexer.interp +231 -0
  143. tactus/validation/generated/LuaLexer.py +5548 -0
  144. tactus/validation/generated/LuaLexer.tokens +124 -0
  145. tactus/validation/generated/LuaLexerBase.py +66 -0
  146. tactus/validation/generated/LuaParser.interp +173 -0
  147. tactus/validation/generated/LuaParser.py +6439 -0
  148. tactus/validation/generated/LuaParser.tokens +124 -0
  149. tactus/validation/generated/LuaParserBase.py +23 -0
  150. tactus/validation/generated/LuaParserVisitor.py +118 -0
  151. tactus/validation/generated/__init__.py +7 -0
  152. tactus/validation/grammar/LuaLexer.g4 +123 -0
  153. tactus/validation/grammar/LuaParser.g4 +178 -0
  154. tactus/validation/semantic_visitor.py +817 -0
  155. tactus/validation/validator.py +157 -0
  156. tactus-0.31.2.dist-info/METADATA +1809 -0
  157. tactus-0.31.2.dist-info/RECORD +160 -0
  158. tactus-0.31.2.dist-info/WHEEL +4 -0
  159. tactus-0.31.2.dist-info/entry_points.txt +2 -0
  160. tactus-0.31.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1809 @@
1
+ Metadata-Version: 2.4
2
+ Name: tactus
3
+ Version: 0.31.2
4
+ Summary: Tactus: Lua-based DSL for agentic workflows
5
+ Project-URL: Homepage, https://github.com/AnthusAI/Tactus
6
+ Project-URL: Documentation, https://github.com/AnthusAI/Tactus/tree/main/docs
7
+ Project-URL: Repository, https://github.com/AnthusAI/Tactus
8
+ Project-URL: Issues, https://github.com/AnthusAI/Tactus/issues
9
+ Author-email: Anthus <info@anthus.ai>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: agents,ai,dsl,llm,lua,workflows
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: antlr4-python3-runtime==4.13.1
23
+ Requires-Dist: behave>=1.2.6
24
+ Requires-Dist: boto3>=1.28.0
25
+ Requires-Dist: dotyaml>=0.1.0
26
+ Requires-Dist: dspy>=2.5
27
+ Requires-Dist: gherkin-official>=28.0.0
28
+ Requires-Dist: h5py>=3.10
29
+ Requires-Dist: lupa>=2.6
30
+ Requires-Dist: nanoid>=2.0.0
31
+ Requires-Dist: openai>=1.35.10
32
+ Requires-Dist: openpyxl>=3.1
33
+ Requires-Dist: pyarrow>=14.0
34
+ Requires-Dist: pydantic-ai[bedrock]
35
+ Requires-Dist: pydantic>=2.0
36
+ Requires-Dist: pyyaml
37
+ Requires-Dist: rapidfuzz>=3.0.0
38
+ Requires-Dist: rich>=13.9.4
39
+ Requires-Dist: typer
40
+ Provides-Extra: dev
41
+ Requires-Dist: antlr4-tools>=0.2.1; extra == 'dev'
42
+ Requires-Dist: behave>=1.2.6; extra == 'dev'
43
+ Requires-Dist: black; extra == 'dev'
44
+ Requires-Dist: fastmcp>=2.3.5; extra == 'dev'
45
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
46
+ Requires-Dist: pytest-xdist>=3.0; extra == 'dev'
47
+ Requires-Dist: pytest>=8.0; extra == 'dev'
48
+ Requires-Dist: python-semantic-release>=9.0.0; extra == 'dev'
49
+ Requires-Dist: ruff; extra == 'dev'
50
+ Description-Content-Type: text/markdown
51
+
52
+ # Tactus
53
+
54
+ **A programming language for reliable, tool-using AI agents.**
55
+
56
+ *Agents that never lose their place.*
57
+
58
+ Tactus is a Lua-based DSL for building agent programs: you define tools, agents, and procedures that orchestrate their work. It’s designed for **bounded autonomy**—use imperative code for the steps that must be deterministic, and agent turns for the steps that benefit from intelligence. The runtime handles durability, human-in-the-loop, tool/context control, and testing so that workflows can run for hours or days and still be shippable.
59
+
60
+ > **Status:** Alpha. APIs and syntax may change; not production-ready.
61
+
62
+ ## The Problem: Agent Scripts Don’t Scale
63
+
64
+ “Give an agent tools and a prompt” works surprisingly well when you’re there to steer. But when you run the same workflow autonomously (or thousands of times), small failure rates turn into real incidents.
65
+
66
+ Real-world agent programs need to:
67
+
68
+ - **Wait for humans**: Approval gates, reviews, input requests
69
+ - **Survive failures**: Network timeouts, API errors, process crashes
70
+ - **Run for hours or days**: Long tasks, retries, handoffs
71
+ - **Control capabilities and context**: Change tool access and the information an agent sees as the workflow progresses
72
+ - **Be testable**: Verify orchestration logic and measure reliability
73
+
74
+ Traditional frameworks help you call models, but the rest becomes infrastructure you build yourself: state machines, checkpoint tables, replay logic, HITL plumbing, and bespoke tests.
75
+
76
+ ## The Solution: Imperative Orchestration with Transparent Durability
77
+
78
+ In Tactus, the deterministic parts are just code—loops, conditionals, function calls. When you want intelligence, you take an agent turn. The runtime transparently checkpoints every agent turn, tool call, and human interaction so execution can suspend and resume safely:
79
+
80
+ ```lua
81
+ -- This looks like it runs straight through
82
+ repeat
83
+ researcher()
84
+ until Tool.called("done")
85
+
86
+ -- But here execution might suspend for days
87
+ local approved = Human.approve({message = "Deploy to production?"})
88
+
89
+ -- When the human responds, execution resumes exactly here
90
+ if approved then
91
+ deploy()
92
+ end
93
+ ```
94
+
95
+ Every agent turn, every tool call, every human interaction is automatically checkpointed. No state machines. No manual serialization. No replay logic.
96
+
97
+ ### Compare: Graph-Based vs. Imperative Durability
98
+
99
+ LangGraph does support persistence—when you compile a graph with a checkpointer, it saves state at every "super-step" (node boundary). But you're still designing a state machine:
100
+
101
+ ```python
102
+ # LangGraph: Define state, nodes, and edges explicitly
103
+ class State(TypedDict):
104
+ messages: list
105
+ research_complete: bool
106
+ approved: bool | None
107
+
108
+ graph = StateGraph(State)
109
+ graph.add_node("research", research_node)
110
+ graph.add_node("wait_approval", wait_approval_node)
111
+ graph.add_node("deploy", deploy_node)
112
+ graph.add_edge("research", "wait_approval")
113
+ graph.add_conditional_edges("wait_approval", route_on_approval, {
114
+ "approved": "deploy",
115
+ "rejected": END
116
+ })
117
+
118
+ # Add checkpointer for persistence
119
+ memory = SqliteSaver.from_conn_string(":memory:")
120
+ app = graph.compile(checkpointer=memory)
121
+ ```
122
+
123
+ This is powerful, but your workflow must be expressed as a graph. Nodes, edges, conditional routing. The structure is explicit.
124
+
125
+ **With Tactus**, you write imperative code. Loops, conditionals, function calls—the control flow you already know:
126
+
127
+ ```lua
128
+ repeat researcher() until Tool.called("done")
129
+ local approved = Human.approve({message = "Deploy?"})
130
+ if approved then deploy() end
131
+ ```
132
+
133
+ Same workflow. No graph definition. The runtime checkpoints every operation transparently—agent turns, tool calls, human interactions—and resumes exactly where execution left off.
134
+
135
+ The difference isn't whether checkpointing exists, but how you express your workflow. Graphs vs. imperative code. Explicit structure vs. transparent durability.
136
+
137
+ ---
138
+
139
+ ## Everything as Code
140
+
141
+ Tactus isn't just durable—it's designed for agents that build and modify other agents.
142
+
143
+ Most frameworks scatter agent logic across Python classes, decorators, YAML files, and configuration objects. This is opaque to AI. An agent can't easily read, understand, and improve its own definition when it's spread across a codebase.
144
+
145
+ Tactus takes a different approach: **the entire agent definition is a single, readable file.**
146
+
147
+ ```lua
148
+ done = tactus.done
149
+ search = mcp.brave_search.search
150
+ analyze = mcp.analyze.analyze
151
+
152
+ researcher = Agent {
153
+ model = "gpt-4o",
154
+ system_prompt = "Research the topic thoroughly.",
155
+ tools = {search, analyze, done}
156
+ }
157
+
158
+ Procedure {
159
+ input = {
160
+ topic = field.string{required = true}
161
+ },
162
+ output = {
163
+ findings = field.string{required = true}
164
+ },
165
+ function(input)
166
+ repeat
167
+ researcher()
168
+ until Tool.called("done")
169
+ return {findings = Tool.last_result("done")}
170
+ end
171
+ }
172
+
173
+ Specification([[
174
+ Feature: Research
175
+ Scenario: Completes research
176
+ When the researcher agent takes turns
177
+ Then the search tool should be called at least once
178
+ ]])
179
+ ```
180
+
181
+ Agents, orchestration, contracts, and tests—all in one file. All in a minimal syntax that fits in context windows and produces clean diffs.
182
+
183
+ This enables:
184
+
185
+ - **Self-evolution**: An agent reads its own definition, identifies improvements, rewrites itself
186
+ - **Agent-building agents**: A meta-agent that designs and iterates on specialized agents
187
+ - **Transparent iteration**: When an agent modifies code, you can diff the changes
188
+
189
+ ---
190
+
191
+ ## Safe Embedding
192
+
193
+ Tactus is designed for platforms that run user-contributed agent definitions—like n8n or Zapier, but where the automations are intelligent agents.
194
+
195
+ This requires true sandboxing. User A's agent can't escape to affect user B. Can't access the filesystem. Can't make network calls. Unless you explicitly provide tools that grant these capabilities.
196
+
197
+ Python can't be safely sandboxed. Lua was designed for it—decades of proven use in game modding, nginx plugins, Redis scripts.
198
+
199
+ Tactus agents run in a restricted Lua VM:
200
+
201
+ - No filesystem access by default
202
+ - No network access by default
203
+ - No environment variable access by default
204
+ - The tools you provide are the *only* capabilities the agent has
205
+
206
+ This makes Tactus safe for:
207
+
208
+ - Multi-tenant platforms running user-contributed agents
209
+ - Embedding in applications where untrusted code is a concern
210
+ - Letting AI agents write and execute their own orchestration logic
211
+
212
+ ---
213
+
214
+ ## Omnichannel Human-in-the-Loop
215
+
216
+ When an agent needs human input, *how* that request reaches the human depends on the channel. The agent shouldn't care.
217
+
218
+ Tactus separates the *what* from the *how*:
219
+
220
+ ```lua
221
+ local approved = Human.approve({
222
+ message = "Deploy to production?",
223
+ context = {version = "2.1.0", environment = "prod"}
224
+ })
225
+ ```
226
+
227
+ The agent declares what it needs. The platform decides how to render it:
228
+
229
+ | Channel | Rendering |
230
+ |---------|-----------|
231
+ | **Web** | Modal with Approve/Reject buttons |
232
+ | **Slack** | Interactive message with button actions |
233
+ | **SMS** | "Deploy v2.1.0 to prod? Reply YES or NO" |
234
+ | **Voice** | "Should I deploy version 2.1.0 to production?" |
235
+ | **Email** | Message with approve/reject links |
236
+
237
+ Because procedures declare typed inputs, platforms can auto-generate UI for any channel:
238
+
239
+ ```lua
240
+ main = procedure("main", {
241
+ input = {
242
+ topic = { type = "string", required = true },
243
+ depth = { type = "string", enum = {"shallow", "deep"}, default = "shallow" },
244
+ max_results = { type = "number", default = 10 },
245
+ include_sources = { type = "boolean", default = true },
246
+ tags = { type = "array", default = {} },
247
+ config = { type = "object", default = {} }
248
+ }
249
+ }, function()
250
+ -- Access inputs directly in Lua
251
+ log("Researching: " .. input.topic)
252
+ log("Depth: " .. input.depth)
253
+ log("Max results: " .. input.max_results)
254
+
255
+ -- Arrays and objects work seamlessly
256
+ for i, tag in ipairs(input.tags) do
257
+ log("Tag " .. i .. ": " .. tag)
258
+ end
259
+
260
+ -- ... rest of procedure
261
+ end)
262
+ ```
263
+
264
+ **Input Types Supported:**
265
+ - `string`: Text values with optional enums for constrained choices
266
+ - `number`: Integers and floats
267
+ - `boolean`: True/false values
268
+ - `array`: Lists of values (converted to 1-indexed Lua tables)
269
+ - `object`: Key-value dictionaries (converted to Lua tables)
270
+
271
+ **Input Sources:**
272
+ - **CLI**: Parameters via `--param`, interactive prompting, or automatic prompting for missing required inputs
273
+ - **GUI**: Modal dialog before execution with type-appropriate form controls
274
+ - **SDK**: Direct passing via `context` parameter to `runtime.execute()`
275
+
276
+ A web app renders a form. Slack renders a modal. SMS runs a structured conversation. The CLI provides interactive prompts.
277
+
278
+ One agent definition. Every channel. Type-safe inputs everywhere.
279
+
280
+ ---
281
+
282
+ ## Testing Built In
283
+
284
+ When agents modify agents, verification is essential. Tactus makes BDD specifications part of the language:
285
+
286
+ ```lua
287
+ specifications([[
288
+ Feature: Research Task
289
+ Scenario: Agent completes research
290
+ Given the procedure has started
291
+ When the researcher agent takes turns
292
+ Then the search tool should be called at least once
293
+ And the done tool should be called exactly once
294
+ ]])
295
+ ```
296
+
297
+ Run tests with `tactus test`. Measure consistency with `tactus test --runs 10`. When an agent rewrites itself, the tests verify it still works.
298
+
299
+ ---
300
+
301
+ ## The Broader Context
302
+
303
+ Tactus serves a paradigm shift in programming: from anticipating every scenario to providing capabilities and goals.
304
+
305
+ Traditional code requires you to handle every case—every header name, every format, every edge condition. Miss one and your program breaks.
306
+
307
+ Agent programming inverts this: give an agent tools, describe the goal, let intelligence handle the rest.
308
+
309
+ But to run this autonomously, you need more than a prompt: you need bounded autonomy (tool + context control), durability, HITL, and tests. Tactus is the language for making “give an agent a tool” workflows reliable.
310
+
311
+ ```lua
312
+ done = tactus.done
313
+ file_contact = mcp.contacts.file_contact
314
+
315
+ importer = Agent {
316
+ system_prompt = "Extract contacts from the data. File each one you find.",
317
+ tools = {file_contact, done}
318
+ }
319
+ ```
320
+
321
+ When a new format appears—unexpected headers, mixed delimiters, a language you didn't anticipate—the agent adapts. No code changes.
322
+
323
+ See [Give an Agent a Tool](https://github.com/AnthusAI/Give-an-Agent-a-Tool) for a deep dive on this paradigm shift.
324
+
325
+ ---
326
+
327
+ ## What This Enables
328
+
329
+ **Agent platforms**: Build your own n8n/Zapier where users define intelligent agents. Tactus handles sandboxing, durability, and multi-tenancy.
330
+
331
+ **Self-evolving agents**: Agents that read their own definitions, identify improvements, and rewrite themselves.
332
+
333
+ **Agents building agents**: A meta-agent that designs, tests, and iterates on specialized agents for specific tasks.
334
+
335
+ **Omnichannel deployment**: Write agent logic once. Deploy across web, mobile, Slack, SMS, voice, email.
336
+
337
+ **Long-running workflows**: Agents that wait for humans, coordinate with external systems, and run for days without losing progress.
338
+
339
+ ---
340
+
341
+ ## Tools
342
+
343
+ Tools are the capabilities you give to agents. Tactus supports multiple ways to define and connect tools.
344
+
345
+ ### MCP Server Integration
346
+
347
+ Connect to [Model Context Protocol](https://modelcontextprotocol.io/) servers to access external tool ecosystems:
348
+
349
+ ```yaml
350
+ # .tactus/config.yml
351
+ mcp_servers:
352
+ plexus:
353
+ command: "python"
354
+ args: ["-m", "plexus.mcp"]
355
+ env:
356
+ PLEXUS_API_KEY: "${PLEXUS_API_KEY}"
357
+
358
+ filesystem:
359
+ command: "npx"
360
+ args: ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
361
+ ```
362
+
363
+ Tools from MCP servers are accessed via the `mcp` namespace:
364
+
365
+ ```lua
366
+ done = tactus.done
367
+ score_info = mcp.plexus.score_info
368
+ read_file = mcp.filesystem.read_file
369
+
370
+ worker = Agent {
371
+ tools = {score_info, read_file, done}
372
+ }
373
+ ```
374
+
375
+ ### Inline Lua Tools
376
+
377
+ Define tools directly in your `.tac` file—no external servers required:
378
+
379
+ **Individual tools:**
380
+
381
+ ```lua
382
+ done = tactus.done
383
+
384
+ calculate_tip = Tool {
385
+ description = "Calculate tip amount for a bill",
386
+ input = {
387
+ amount = field.number{required = true},
388
+ percent = field.number{required = true}
389
+ },
390
+ function(args)
391
+ return string.format("$%.2f", args.amount * args.percent / 100)
392
+ end
393
+ }
394
+
395
+ assistant = Agent {
396
+ tools = {calculate_tip, done}
397
+ }
398
+ ```
399
+
400
+ **Grouped toolsets:**
401
+
402
+ ```lua
403
+ done = tactus.done
404
+
405
+ math_tools = Toolset {
406
+ type = "lua",
407
+ tools = {
408
+ {name = "add", input = {...}, handler = function(args) ... end},
409
+ {name = "multiply", input = {...}, handler = function(args) ... end}
410
+ }
411
+ }
412
+
413
+ calculator = Agent {
414
+ tools = {math_tools, done}
415
+ }
416
+ ```
417
+
418
+ **Inline agent tools:**
419
+
420
+ ```lua
421
+ done = tactus.done
422
+
423
+ text_processor = Agent {
424
+ inline_tools = {
425
+ {name = "uppercase", input = {...}, handler = function(args)
426
+ return string.upper(args.text)
427
+ end}
428
+ },
429
+ tools = {done}
430
+ }
431
+ ```
432
+
433
+ ### Direct Tool Invocation
434
+
435
+ Call tools directly from Lua code for deterministic control:
436
+
437
+ ```lua
438
+ -- Tool returns a callable handle - assign it for direct use
439
+ calculate_tip = Tool {
440
+ description = "Calculate tip",
441
+ input = {
442
+ amount = field.number{required = true},
443
+ percent = field.number{required = true}
444
+ },
445
+ function(args)
446
+ return args.amount * args.percent / 100
447
+ end
448
+ }
449
+
450
+ -- Call directly - no LLM involvement
451
+ local tip = calculate_tip({amount = 50, percent = 20})
452
+
453
+ -- Pass results to agent via context
454
+ summarizer({
455
+ context = {
456
+ tip_calculation = tip,
457
+ original_amount = "$50.00"
458
+ }
459
+ })
460
+ ```
461
+
462
+ ### Tool Tracking
463
+
464
+ Check which tools were called and access their results:
465
+
466
+ ```lua
467
+ if Tool.called("search") then
468
+ local result = Tool.last_result("search")
469
+ local call = Tool.last_call("search") -- {args = {...}, result = "..."}
470
+ end
471
+ ```
472
+
473
+ ### Per-Turn Tool Control
474
+
475
+ Control which tools are available on each turn—essential for patterns like tool result summarization:
476
+
477
+ ```lua
478
+ repeat
479
+ researcher() -- Has all tools
480
+
481
+ if Tool.called("search") then
482
+ -- Summarize with NO tools (prevents recursive calls)
483
+ researcher({
484
+ message = "Summarize the search results",
485
+ tools = {}
486
+ })
487
+ end
488
+ until Tool.called("done")
489
+ ```
490
+
491
+ See [docs/TOOLS.md](docs/TOOLS.md) for the complete tools reference.
492
+
493
+ ---
494
+
495
+ ## Quick Start
496
+
497
+ ### Installation
498
+
499
+ ```bash
500
+ pip install tactus
501
+ ```
502
+
503
+ ### Your First Procedure
504
+
505
+ Create `hello.tac`:
506
+
507
+ ```lua
508
+ done = tactus.done
509
+
510
+ greeter = Agent {
511
+ provider = "openai",
512
+ model = "gpt-4o-mini",
513
+ system_prompt = [[
514
+ You are a friendly greeter. Greet the user by name: {input.name}
515
+ When done, call the done tool.
516
+ ]],
517
+ tools = {done}
518
+ }
519
+
520
+ Procedure {
521
+ input = {
522
+ name = field.string{default = "World"}
523
+ },
524
+ output = {
525
+ greeting = field.string{required = true}
526
+ },
527
+ function(input)
528
+ repeat
529
+ greeter()
530
+ until Tool.called("done")
531
+
532
+ return { greeting = Tool.last_result("done") }
533
+ end
534
+ }
535
+
536
+ Specification([[
537
+ Feature: Greeting
538
+ Scenario: Agent greets and completes
539
+ When the greeter agent takes turns
540
+ Then the done tool should be called exactly once
541
+ And the procedure should complete successfully
542
+ ]])
543
+ ```
544
+
545
+ **Run it:**
546
+
547
+ ```bash
548
+ export OPENAI_API_KEY=your-key
549
+ tactus run hello.tac
550
+ ```
551
+
552
+ **Test it:**
553
+
554
+ ```bash
555
+ tactus test hello.tac
556
+ ```
557
+
558
+ **Evaluate consistency:**
559
+
560
+ ```bash
561
+ tactus test hello.tac --runs 10
562
+ ```
563
+
564
+ ---
565
+
566
+ ## Documentation
567
+
568
+ - **[SPECIFICATION.md](SPECIFICATION.md)** — Complete DSL reference
569
+ - **[IMPLEMENTATION.md](IMPLEMENTATION.md)** — Implementation status and architecture
570
+ - **[docs/TOOLS.md](docs/TOOLS.md)** — Tools and MCP integration guide
571
+ - **[docs/FILE_IO.md](docs/FILE_IO.md)** — File I/O operations guide (CSV, TSV, Parquet, HDF5, Excel)
572
+ - **[examples/](examples/)** — Example procedures
573
+
574
+ ---
575
+
576
+ ## Key Features
577
+
578
+ ### Per-Turn Tool Control
579
+
580
+ Tactus gives you fine-grained control over what tools an agent has access to on each individual turn. This enables powerful patterns like **tool result summarization**, where you want the agent to explain what a tool returned without having access to call more tools.
581
+
582
+ **The Pattern:**
583
+
584
+ ```lua
585
+ done = tactus.done
586
+ search = mcp.brave_search.search
587
+ analyze = mcp.analyze.analyze
588
+
589
+ researcher = Agent {
590
+ provider = "openai",
591
+ model = "gpt-4o",
592
+ system_prompt = "You are a research assistant.",
593
+ tools = {search, analyze, done}
594
+ }
595
+
596
+ Procedure {
597
+ function(input)
598
+ repeat
599
+ -- Main call: agent has all tools
600
+ researcher()
601
+
602
+ -- After each tool call, ask agent to summarize with NO tools
603
+ if Tool.called("search") or Tool.called("analyze") then
604
+ researcher({
605
+ message = "Summarize the tool results above in 2-3 sentences",
606
+ tools = {} -- No tools for this call!
607
+ })
608
+ end
609
+
610
+ until Tool.called("done")
611
+ end
612
+ }
613
+ ```
614
+
615
+ This creates a rhythm: **tool call → summarization → tool call → summarization → done**
616
+
617
+ **Why this matters:**
618
+
619
+ Without per-call control, an agent might call another tool when you just want it to explain the previous result. By temporarily restricting toolsets to an empty set (`tools = {}`), you ensure the agent focuses on summarization.
620
+
621
+ **Other per-call overrides:**
622
+
623
+ ```lua
624
+ -- Override model parameters for one call
625
+ researcher({
626
+ message = "Be creative with this summary",
627
+ temperature = 0.9,
628
+ max_tokens = 500
629
+ })
630
+
631
+ -- Restrict to specific tools only
632
+ researcher({
633
+ tools = {search, done} -- No analyze for this call
634
+ })
635
+ ```
636
+
637
+ See `examples/14-feature-per-turn-tools.tac` for a complete working example.
638
+
639
+ ### Checkpointed Steps (Determinism)
640
+
641
+ For durable execution, any operation that touches external state (randomness, time, APIs not in tools) must be checkpointed. Tactus provides `Step.checkpoint` for this:
642
+
643
+ ```lua
644
+ -- Non-deterministic operation wrapped in checkpoint
645
+ local data = Step.checkpoint(function()
646
+ return http_get("https://api.example.com/data")
647
+ end)
648
+
649
+ -- On replay, the function is NOT called again.
650
+ -- The previously saved 'data' is returned immediately.
651
+ ```
652
+
653
+ This ensures that when a procedure resumes after a pause (e.g. waiting for a human), it doesn't re-execute side effects or get different random values.
654
+
655
+ ### File I/O Operations
656
+
657
+ Tactus provides safe file I/O operations for reading and writing data files, with all operations restricted to the current working directory for security.
658
+
659
+ **Supported Formats:**
660
+ - **CSV/TSV** — Tabular data with automatic header handling
661
+ - **JSON** — Structured data using File.read/write with Json.encode/decode
662
+ - **Parquet** — Columnar storage for analytics (via pyarrow)
663
+ - **HDF5** — Scientific data with multiple datasets (via h5py)
664
+ - **Excel** — Spreadsheets with sheet support (via openpyxl)
665
+ - **Raw text** — Plain text files and configurations
666
+
667
+ **Example:**
668
+ ```lua
669
+ -- Read CSV data
670
+ local data = Csv.read("sales.csv")
671
+
672
+ -- Process data (0-indexed access)
673
+ for i = 0, data:len() - 1 do
674
+ local row = data[i]
675
+ -- process row...
676
+ end
677
+
678
+ -- Write results
679
+ Csv.write("results.csv", processed_data)
680
+ ```
681
+
682
+ See [`docs/FILE_IO.md`](docs/FILE_IO.md) for the complete API reference and [`examples/52-file-io-basics.tac`](examples/52-file-io-basics.tac) through [`examples/58-text-file-io.tac`](examples/58-text-file-io.tac) for working examples.
683
+
684
+ ### Testing & Evaluation: Two Different Concerns
685
+
686
+ Tactus provides two complementary approaches for ensuring quality, each targeting a different aspect of your agentic workflow:
687
+
688
+ #### Behavior Specifications (BDD): Testing Workflow Logic
689
+
690
+ **What it tests:** The deterministic control flow of your procedure—the Lua code that orchestrates agents, handles conditionals, manages state, and coordinates tools.
691
+
692
+ **When to use:**
693
+ - Complex procedures with branching logic, loops, and state management
694
+ - Multi-agent coordination patterns
695
+ - Error handling and edge cases
696
+ - Procedures where the *orchestration* is more complex than the *intelligence*
697
+
698
+ **How it works:**
699
+ ```lua
700
+ specifications([[
701
+ Feature: Multi-Agent Research Workflow
702
+
703
+ Scenario: Researcher delegates to summarizer
704
+ Given the procedure has started
705
+ When the researcher agent takes 3 turns
706
+ Then the search tool should be called at least once
707
+ And the researcher should call the delegate tool
708
+ And the summarizer agent should take at least 1 turn
709
+ And the done tool should be called exactly once
710
+ ]])
711
+ ```
712
+
713
+ **Key characteristics:**
714
+ - Uses Gherkin syntax (Given/When/Then)
715
+ - Runs with `tactus test`
716
+ - Can use mocks to isolate logic from LLM behavior
717
+ - Deterministic: same input → same execution path
718
+ - Fast: tests orchestration without expensive API calls
719
+ - Measures: "Did the code execute correctly?"
720
+
721
+ #### Gherkin Step Reference
722
+
723
+ Tactus provides a rich library of built-in steps for BDD testing. You can use these immediately in your `specifications` block:
724
+
725
+ **Tool Steps:**
726
+ ```gherkin
727
+ Then the search tool should be called
728
+ Then the search tool should not be called
729
+ Then the search tool should be called at least 3 times
730
+ Then the search tool should be called exactly 2 times
731
+ Then the search tool should be called with query=test
732
+ ```
733
+
734
+ **State Steps:**
735
+ ```gherkin
736
+ Given the procedure has started
737
+ Then the state count should be 5
738
+ Then the state error should exist
739
+ ```
740
+
741
+ **Completion & Iteration Steps:**
742
+ ```gherkin
743
+ Then the procedure should complete successfully
744
+ Then the procedure should fail
745
+ Then the total iterations should be less than 10
746
+ Then the agent should take at least 3 turns
747
+ ```
748
+
749
+ **Custom Steps:**
750
+ Define your own steps in Lua:
751
+ ```lua
752
+ step("the research quality is high", function()
753
+ local results = State.get("results")
754
+ assert(#results > 5, "Not enough results")
755
+ end)
756
+ ```
757
+
758
+ See [tactus/testing/README.md](tactus/testing/README.md) for the complete reference.
759
+
760
+ #### Evaluations: Testing Agent Intelligence
761
+
762
+ **What it tests:** The probabilistic quality of LLM outputs—whether agents produce correct, helpful, and consistent results.
763
+
764
+ **When to use:**
765
+ - Simple "LLM wrapper" procedures (minimal orchestration logic)
766
+ - Measuring output quality (accuracy, tone, format)
767
+ - Testing prompt effectiveness
768
+ - Consistency across multiple runs
769
+ - Procedures where the *intelligence* is more important than the *orchestration*
770
+
771
+ **How it works:**
772
+ ```lua
773
+ evaluations {
774
+ runs = 10, -- Run each test case 10 times
775
+ parallel = true,
776
+
777
+ dataset = {
778
+ {
779
+ name = "greeting_task",
780
+ inputs = {task = "Greet Alice warmly"}
781
+ },
782
+ {
783
+ name = "haiku_task",
784
+ inputs = {task = "Write a haiku about AI"}
785
+ }
786
+ },
787
+
788
+ evaluators = {
789
+ -- Check for required content
790
+ {
791
+ type = "contains",
792
+ field = "output",
793
+ value = "TASK_COMPLETE:"
794
+ },
795
+
796
+ -- Use LLM to judge quality
797
+ {
798
+ type = "llm_judge",
799
+ rubric = [[
800
+ Score 1.0 if the agent:
801
+ - Completed the task successfully
802
+ - Produced high-quality output
803
+ - Called the done tool appropriately
804
+ Score 0.0 otherwise.
805
+ ]],
806
+ model = "openai:gpt-4o-mini"
807
+ }
808
+ }
809
+ }
810
+ ```
811
+
812
+ **Key characteristics:**
813
+ - Uses Pydantic AI Evals framework
814
+ - Runs with `tactus eval`
815
+ - Uses real LLM calls (not mocked)
816
+ - Probabilistic: same input → potentially different outputs
817
+ - Slower: makes actual API calls
818
+ - Measures: "Did the AI produce good results?"
819
+ - Provides success rates, consistency metrics, and per-task breakdowns
820
+
821
+ #### When to Use Which?
822
+
823
+ | Feature | Behavior Specifications (BDD) | Evaluations |
824
+ |---------|-------------------------------|-------------|
825
+ | **Goal** | Verify deterministic logic | Measure probabilistic quality |
826
+ | **Command (Single)** | `tactus test` | `tactus eval` |
827
+ | **Command (Repeat)** | `tactus test --runs 10` (consistency check) | `tactus eval --runs 10` |
828
+ | **Execution** | Fast, mocked (optional) | Slow, real API calls |
829
+ | **Syntax** | Gherkin (`Given`/`When`/`Then`) | Lua configuration table |
830
+ | **Example** | "Did the agent call the tool?" | "Did the agent write a good poem?" |
831
+ | **Best for** | Complex orchestration, state management | LLM output quality, prompt tuning |
832
+
833
+ **Use Behavior Specifications when:**
834
+ - You have complex orchestration logic to test
835
+ - You need fast, deterministic tests
836
+ - You want to verify control flow (loops, conditionals, state)
837
+ - You're testing multi-agent coordination patterns
838
+ - Example: [`examples/20-bdd-complete.tac`](examples/20-bdd-complete.tac)
839
+
840
+ **Use Evaluations when:**
841
+ - Your procedure is mostly an LLM call wrapper
842
+ - You need to measure output quality (accuracy, tone)
843
+ - You want to test prompt effectiveness
844
+ - You need consistency metrics across runs
845
+ - Example: [`examples/36-eval-advanced.tac`](examples/36-eval-advanced.tac)
846
+
847
+ **Use Both when:**
848
+ - You have complex orchestration AND care about output quality
849
+ - Run BDD tests for fast feedback on logic
850
+ - Run evaluations periodically to measure LLM performance
851
+ - Example: [`examples/37-eval-comprehensive.tac`](examples/37-eval-comprehensive.tac)
852
+
853
+ **The key insight:** Behavior specifications test your *code*. Evaluations test your *AI*. Most real-world procedures need both.
854
+
855
+ #### Gherkin Step Reference
856
+
857
+ Tactus provides a rich library of built-in steps for BDD testing. You can use these immediately in your `specifications` block:
858
+
859
+ **Tool Steps:**
860
+ ```gherkin
861
+ Then the search tool should be called
862
+ Then the search tool should not be called
863
+ Then the search tool should be called at least 3 times
864
+ Then the search tool should be called exactly 2 times
865
+ Then the search tool should be called with query=test
866
+ ```
867
+
868
+ **State Steps:**
869
+ ```gherkin
870
+ Given the procedure has started
871
+ Then the state count should be 5
872
+ Then the state error should exist
873
+ ```
874
+
875
+ **Completion & Iteration Steps:**
876
+ ```gherkin
877
+ Then the procedure should complete successfully
878
+ Then the procedure should fail
879
+ Then the total iterations should be less than 10
880
+ Then the agent should take at least 3 turns
881
+ ```
882
+
883
+ **Custom Steps:**
884
+ Define your own steps in Lua:
885
+ ```lua
886
+ step("the research quality is high", function()
887
+ local results = State.get("results")
888
+ assert(#results > 5, "Not enough results")
889
+ end)
890
+ ```
891
+
892
+ See [tactus/testing/README.md](tactus/testing/README.md) for the complete reference.
893
+
894
+ #### Advanced Evaluation Features
895
+
896
+ Tactus evaluations support powerful features for real-world testing:
897
+
898
+ **External Dataset Loading:**
899
+
900
+ Load evaluation cases from external files for better scalability:
901
+
902
+ ```lua
903
+ evaluations {
904
+ -- Load from JSONL file (one case per line)
905
+ dataset_file = "data/eval_cases.jsonl",
906
+
907
+ -- Can also include inline cases (combined with file)
908
+ dataset = {
909
+ {name = "inline_case", inputs = {...}}
910
+ },
911
+
912
+ evaluators = {...}
913
+ }
914
+ ```
915
+
916
+ Supported formats: `.jsonl`, `.json` (array), `.csv`
917
+
918
+ **Trace Inspection:**
919
+
920
+ Evaluators can inspect execution internals beyond just inputs/outputs:
921
+
922
+ ```lua
923
+ evaluators = {
924
+ -- Verify specific tool was called
925
+ {
926
+ type = "tool_called",
927
+ value = "search",
928
+ min_value = 1,
929
+ max_value = 3
930
+ },
931
+
932
+ -- Check agent turn count
933
+ {
934
+ type = "agent_turns",
935
+ field = "researcher",
936
+ min_value = 2,
937
+ max_value = 5
938
+ },
939
+
940
+ -- Verify state variable
941
+ {
942
+ type = "state_check",
943
+ field = "research_complete",
944
+ value = true
945
+ }
946
+ }
947
+ ```
948
+
949
+ **Advanced Evaluator Types:**
950
+
951
+ ```lua
952
+ evaluators = {
953
+ -- Regex pattern matching
954
+ {
955
+ type = "regex",
956
+ field = "phone",
957
+ value = "\\(\\d{3}\\) \\d{3}-\\d{4}"
958
+ },
959
+
960
+ -- JSON schema validation
961
+ {
962
+ type = "json_schema",
963
+ field = "data",
964
+ value = {
965
+ type = "object",
966
+ properties = {
967
+ name = {type = "string"},
968
+ age = {type = "number"}
969
+ },
970
+ required = {"name"}
971
+ }
972
+ },
973
+
974
+ -- Numeric range checking
975
+ {
976
+ type = "range",
977
+ field = "score",
978
+ value = {min = 0, max = 100}
979
+ }
980
+ }
981
+ ```
982
+
983
+ **CI/CD Thresholds:**
984
+
985
+ Define quality gates that fail the build if not met:
986
+
987
+ ```lua
988
+ evaluations {
989
+ dataset = {...},
990
+ evaluators = {...},
991
+
992
+ -- Quality thresholds for CI/CD
993
+ thresholds = {
994
+ min_success_rate = 0.90, -- Fail if < 90% pass
995
+ max_cost_per_run = 0.01, -- Fail if too expensive
996
+ max_duration = 10.0, -- Fail if too slow (seconds)
997
+ max_tokens_per_run = 500 -- Fail if too many tokens
998
+ }
999
+ }
1000
+ ```
1001
+
1002
+ When thresholds are not met, `tactus eval` exits with code 1, enabling CI/CD integration.
1003
+
1004
+ **See examples:**
1005
+ - [`examples/34-eval-dataset.tac`](examples/34-eval-dataset.tac) - External dataset loading
1006
+ - [`examples/35-eval-trace.tac`](examples/35-eval-trace.tac) - Trace-based evaluators
1007
+ - [`examples/36-eval-advanced.tac`](examples/36-eval-advanced.tac) - Regex, JSON schema, range
1008
+ - [`examples/33-eval-thresholds.tac`](examples/33-eval-thresholds.tac) - CI/CD quality gates
1009
+ - [`examples/37-eval-comprehensive.tac`](examples/37-eval-comprehensive.tac) - All features combined
1010
+
1011
+ ### Multi-Model and Multi-Provider Support
1012
+
1013
+ Use different models and providers for different tasks within the same workflow. **Every agent must specify a `provider:`** (either directly or via `default_provider:` at the procedure level).
1014
+
1015
+ **Supported providers:** `openai`, `bedrock`
1016
+
1017
+ **Mix models for different capabilities:**
1018
+
1019
+ ```lua
1020
+ done = tactus.done
1021
+ search = mcp.brave_search.search
1022
+
1023
+ researcher = Agent {
1024
+ provider = "openai",
1025
+ model = "gpt-4o", -- Use GPT-4o for complex research
1026
+ system_prompt = "Research the topic thoroughly...",
1027
+ tools = {search, done}
1028
+ }
1029
+
1030
+ summarizer = Agent {
1031
+ provider = "openai",
1032
+ model = "gpt-4o-mini", -- Use GPT-4o-mini for simple summarization
1033
+ system_prompt = "Summarize the findings concisely...",
1034
+ tools = {done}
1035
+ }
1036
+ ```
1037
+
1038
+ **Mix providers (OpenAI + Bedrock):**
1039
+
1040
+ ```lua
1041
+ done = tactus.done
1042
+
1043
+ openai_analyst = Agent {
1044
+ provider = "openai",
1045
+ model = "gpt-4o",
1046
+ system_prompt = "Analyze the data...",
1047
+ tools = {done}
1048
+ }
1049
+
1050
+ bedrock_reviewer = Agent {
1051
+ provider = "bedrock",
1052
+ model = "anthropic.claude-3-5-sonnet-20240620-v1:0",
1053
+ system_prompt = "Review the analysis...",
1054
+ tools = {done}
1055
+ }
1056
+ ```
1057
+
1058
+ **Configure model-specific parameters:**
1059
+
1060
+ ```lua
1061
+ done = tactus.done
1062
+
1063
+ creative_writer = Agent {
1064
+ provider = "openai",
1065
+ model = {
1066
+ name = "gpt-4o",
1067
+ temperature = 0.9, -- Higher creativity
1068
+ max_tokens = 2000
1069
+ },
1070
+ system_prompt = "Write creatively...",
1071
+ tools = {done}
1072
+ }
1073
+
1074
+ reasoning_agent = Agent {
1075
+ provider = "openai",
1076
+ model = {
1077
+ name = "gpt-5", -- Reasoning model
1078
+ openai_reasoning_effort = "high",
1079
+ max_tokens = 4000
1080
+ },
1081
+ system_prompt = "Solve this complex problem...",
1082
+ tools = {done}
1083
+ }
1084
+ ```
1085
+
1086
+ **Configuration via `.tactus/config.yml`:**
1087
+
1088
+ ```yaml
1089
+ # OpenAI credentials
1090
+ openai_api_key: sk-...
1091
+
1092
+ # AWS Bedrock credentials
1093
+ aws_access_key_id: AKIA...
1094
+ aws_secret_access_key: ...
1095
+ aws_default_region: us-east-1
1096
+
1097
+ # Optional defaults
1098
+ default_provider: openai
1099
+ default_model: gpt-4o
1100
+ ```
1101
+
1102
+ ### DSPy Integration
1103
+
1104
+ Tactus provides first-class support for **DSPy Modules and Signatures**, enabling you to build declarative, self-optimizing AI components directly within your agent workflows.
1105
+
1106
+ **Modules & Signatures:**
1107
+
1108
+ Instead of hand-tuning prompts, define what you want the model to do using typed signatures:
1109
+
1110
+ ```lua
1111
+ -- Configure the Language Model for DSPy
1112
+ LM("openai/gpt-4o")
1113
+
1114
+ -- Define a module with a typed signature
1115
+ summarizer = Module {
1116
+ signature = "text -> summary",
1117
+ strategy = "chain_of_thought" -- Use Chain of Thought reasoning
1118
+ }
1119
+
1120
+ -- Or define complex signatures with specific fields
1121
+ classifier = Module {
1122
+ signature = Signature {
1123
+ input = {
1124
+ text = field.string{description = "The customer email to classify"}
1125
+ },
1126
+ output = {
1127
+ category = field.string{description = "Support category (Billing, Tech, Sales)"},
1128
+ priority = field.string{description = "Priority level (Low, High, Critical)"}
1129
+ }
1130
+ },
1131
+ strategy = "predict"
1132
+ }
1133
+ ```
1134
+
1135
+ **Using Modules:**
1136
+
1137
+ Modules are callable just like Agents or Tools:
1138
+
1139
+ ```lua
1140
+ Procedure {
1141
+ function(input)
1142
+ -- Call the module
1143
+ local result = classifier({text = input.email})
1144
+
1145
+ if result.priority == "Critical" then
1146
+ human_escalation({context = result})
1147
+ else
1148
+ auto_responder({category = result.category})
1149
+ end
1150
+ end
1151
+ }
1152
+ ```
1153
+
1154
+ This brings the power of DSPy's programmable LLM interfaces into Tactus's durable, orchestrated environment.
1155
+
1156
+ ### Asynchronous Execution
1157
+
1158
+ Tactus is built on **async I/O** from the ground up, making it ideal for LLM-based workflows where you spend most of your time waiting for API responses.
1159
+
1160
+ **Why async I/O matters for LLMs:**
1161
+
1162
+ - **Not multi-threading**: Async I/O uses a single thread with cooperative multitasking
1163
+ - **Perfect for I/O-bound tasks**: While waiting for one LLM response, handle other requests
1164
+ - **Efficient resource usage**: No thread overhead, minimal memory footprint
1165
+ - **Natural for LLM workflows**: Most time is spent waiting for API calls, not computing
1166
+
1167
+ **Spawn async procedures:**
1168
+
1169
+ ```lua
1170
+ -- Start multiple research tasks in parallel
1171
+ local handles = {}
1172
+ for _, topic in ipairs(topics) do
1173
+ handles[topic] = Procedure.spawn("researcher", {query = topic})
1174
+ end
1175
+
1176
+ -- Wait for all to complete
1177
+ Procedure.wait_all(handles)
1178
+
1179
+ -- Collect results
1180
+ local results = {}
1181
+ for topic, handle in pairs(handles) do
1182
+ results[topic] = Procedure.result(handle)
1183
+ end
1184
+ ```
1185
+
1186
+ **Check status and wait with timeout:**
1187
+
1188
+ ```lua
1189
+ local handle = Procedure.spawn("long_task", params)
1190
+
1191
+ -- Check status without blocking
1192
+ local status = Procedure.status(handle)
1193
+ if status.waiting_for_human then
1194
+ notify_channel("Task waiting for approval")
1195
+ end
1196
+
1197
+ -- Wait with timeout
1198
+ local result = Procedure.wait(handle, {timeout = 300})
1199
+ if not result then
1200
+ Log.warn("Task timed out")
1201
+ end
1202
+ ```
1203
+
1204
+ ### Context Engineering
1205
+
1206
+ Tactus gives you fine-grained control over what each agent sees in the conversation history. This is crucial for multi-agent workflows where different agents need different perspectives.
1207
+
1208
+ **Message classification with `humanInteraction`:**
1209
+
1210
+ Every message has a classification that determines visibility:
1211
+
1212
+ - `INTERNAL`: Agent reasoning, hidden from humans
1213
+ - `CHAT`: Normal human-AI conversation
1214
+ - `NOTIFICATION`: Progress updates to humans
1215
+ - `PENDING_APPROVAL`: Waiting for human approval
1216
+ - `PENDING_INPUT`: Waiting for human input
1217
+ - `PENDING_REVIEW`: Waiting for human review
1218
+
1219
+ **Filter conversation history per agent:**
1220
+
1221
+ ```lua
1222
+ done = tactus.done
1223
+ search = mcp.brave_search.search
1224
+ analyze = mcp.analyze.analyze
1225
+
1226
+ worker = Agent {
1227
+ system_prompt = "Process the task...",
1228
+ tools = {search, analyze, done},
1229
+
1230
+ -- Control what this agent sees
1231
+ filter = {
1232
+ class = "ComposedFilter",
1233
+ chain = {
1234
+ {
1235
+ class = "TokenBudget",
1236
+ max_tokens = 120000
1237
+ },
1238
+ {
1239
+ class = "LimitToolResults",
1240
+ count = 2 -- Only show last 2 tool results
1241
+ }
1242
+ }
1243
+ }
1244
+ }
1245
+ ```
1246
+
1247
+ **Manage session state programmatically:**
1248
+
1249
+ ```lua
1250
+ -- Inject context for the next turn
1251
+ Session.inject_system("Focus on the security implications")
1252
+
1253
+ -- Access conversation history
1254
+ local history = Session.history()
1255
+
1256
+ -- Clear history for a fresh start
1257
+ Session.clear()
1258
+
1259
+ -- Save/load conversation state
1260
+ Session.save_to_node(checkpoint_node)
1261
+ Session.load_from_node(checkpoint_node)
1262
+ ```
1263
+
1264
+ **Why this matters:**
1265
+
1266
+ - **Token efficiency**: Keep context within model limits
1267
+ - **Agent specialization**: Each agent sees only what's relevant to its role
1268
+ - **Privacy**: Hide sensitive information from certain agents
1269
+ - **Debugging**: Control visibility for testing and development
1270
+
1271
+ ### Advanced HITL Patterns
1272
+
1273
+ Beyond the omnichannel HITL described earlier, Tactus provides detailed primitives for human oversight and collaboration. You can request approval, input, or review at any point in your workflow.
1274
+
1275
+ **Request approval before critical actions:**
1276
+
1277
+ ```lua
1278
+ local approved = Human.approve({
1279
+ message = "Deploy to production?",
1280
+ context = {environment = "prod", version = "2.1.0"},
1281
+ timeout = 3600, -- seconds
1282
+ default = false
1283
+ })
1284
+
1285
+ if approved then
1286
+ deploy_to_production()
1287
+ else
1288
+ Log.info("Deployment cancelled by operator")
1289
+ end
1290
+ ```
1291
+
1292
+ **Request human input:**
1293
+
1294
+ ```lua
1295
+ local topic = Human.input({
1296
+ message = "What topic should I research next?",
1297
+ placeholder = "Enter a topic...",
1298
+ timeout = nil -- wait forever
1299
+ })
1300
+
1301
+ if topic then
1302
+ Procedure.run("researcher", {query = topic})
1303
+ end
1304
+ ```
1305
+
1306
+ **Request review of generated content:**
1307
+
1308
+ ```lua
1309
+ local review = Human.review({
1310
+ message = "Please review this generated document",
1311
+ artifact = generated_content,
1312
+ artifact_type = "document",
1313
+ options = {
1314
+ {label = "Approve", type = "action"},
1315
+ {label = "Reject", type = "cancel"},
1316
+ {label = "Revise", type = "action"}
1317
+ },
1318
+ timeout = 86400 -- 24 hours
1319
+ })
1320
+
1321
+ if review.decision == "Approve" then
1322
+ publish(generated_content)
1323
+ elseif review.decision == "Revise" then
1324
+ State.set("human_feedback", review.feedback)
1325
+ -- retry with feedback
1326
+ end
1327
+ ```
1328
+
1329
+ **Declare HITL points for reusable workflows:**
1330
+
1331
+ ```lua
1332
+ hitl("confirm_publish", {
1333
+ type = "approval",
1334
+ message = "Publish this document to production?",
1335
+ timeout = 3600,
1336
+ default = false
1337
+ })
1338
+ ```
1339
+
1340
+ Then reference them in your procedure:
1341
+
1342
+ ```lua
1343
+ local approved = Human.approve("confirm_publish")
1344
+ ```
1345
+
1346
+ **System Alerts:**
1347
+
1348
+ Send alerts to your monitoring infrastructure (Datadog, PagerDuty) directly from the workflow:
1349
+
1350
+ ```lua
1351
+ System.alert({
1352
+ message = "Failure rate exceeded threshold",
1353
+ level = "error", -- info, warning, error, critical
1354
+ context = {
1355
+ current_rate = 0.15,
1356
+ threshold = 0.05
1357
+ }
1358
+ })
1359
+ ```
1360
+
1361
+ ### Cost Tracking & Metrics
1362
+
1363
+ Tactus provides **comprehensive cost and performance tracking** for all LLM calls. Every agent interaction is monitored with detailed metrics, giving you complete visibility into costs, performance, and behavior.
1364
+
1365
+ **Real-time cost reporting:**
1366
+
1367
+ ```
1368
+ 💰 Cost researcher: $0.000375 (250 tokens, gpt-4o-mini, 1.2s)
1369
+ 💰 Cost summarizer: $0.000750 (500 tokens, gpt-4o, 2.1s)
1370
+
1371
+ ✓ Procedure completed: 2 iterations, 3 tools used
1372
+
1373
+ 💰 Cost Summary
1374
+ Total Cost: $0.001125
1375
+ Total Tokens: 750
1376
+
1377
+ Per-call breakdown:
1378
+ researcher: $0.000375 (250 tokens, 1.2s)
1379
+ summarizer: $0.000750 (500 tokens, 2.1s)
1380
+ ```
1381
+
1382
+ **Comprehensive metrics tracked:**
1383
+
1384
+ - **Cost**: Prompt cost, completion cost, total cost (calculated from model pricing)
1385
+ - **Tokens**: Prompt tokens, completion tokens, total tokens, cached tokens
1386
+ - **Performance**: Duration, latency (time to first token)
1387
+ - **Reliability**: Retry count, validation errors
1388
+ - **Efficiency**: Cache hits, cache savings
1389
+ - **Context**: Message count, new messages per turn
1390
+ - **Metadata**: Request ID, model version, temperature, max tokens
1391
+
1392
+ **Visibility everywhere:**
1393
+
1394
+ - **CLI**: Real-time cost logging per call + summary at end
1395
+ - **IDE**: Collapsible cost events with primary metrics visible, detailed metrics expandable
1396
+ - **Tests**: Cost tracking during test runs
1397
+ - **Evaluations**: Aggregate costs across multiple runs
1398
+
1399
+ **Collapsible IDE display:**
1400
+
1401
+ The IDE shows a clean summary by default (agent, cost, tokens, model, duration) with a single click to expand full details including cost breakdown, performance metrics, retry information, cache statistics, and request metadata.
1402
+
1403
+ This helps you:
1404
+ - **Optimize costs**: Identify expensive agents and calls
1405
+ - **Debug performance**: Track latency and duration issues
1406
+ - **Monitor reliability**: See retry patterns and validation failures
1407
+ - **Measure efficiency**: Track cache hit rates and savings
1408
+
1409
+ ## Philosophy & Research
1410
+
1411
+ Tactus is built on the convergence of two critical insights: the necessity of **Self-Evolution** for future intelligence, and the requirement for **Bounded Control** in present-day production.
1412
+
1413
+ ### 1. The Substrate for Self-Evolution
1414
+
1415
+ The path to Artificial Super Intelligence (ASI) lies in **Self-Evolving Agents**—systems that can adapt and improve their own components over time. A major 2025 survey, *[A Survey of Self-Evolving Agents](https://arxiv.org/abs/2507.21046)*, identifies four dimensions where evolution must occur:
1416
+
1417
+ * **Models**: Optimizing prompts and fine-tuning weights.
1418
+ * **Memory**: Accumulating and refining experience.
1419
+ * **Tools**: Creating and mastering new capabilities.
1420
+ * **Architecture**: Rewriting the flow of logic and interaction.
1421
+
1422
+ **The "Agent as Code" Advantage**
1423
+
1424
+ For an agent to evolve, it must be able to modify itself. In traditional frameworks, logic is locked in compiled code or complex Python class hierarchies. Tactus takes a radical approach: **The entire agent is defined as data.**
1425
+
1426
+ By defining the agent's prompts, tools, and logic in a transparent, editable Lua DSL, Tactus makes the agent's own structure accessible to itself. This textual representation allows an agent to read, analyze, and *rewrite* its own definition, unlocking the potential for true self-evolution across all four dimensions.
1427
+
1428
+ ### 2. Production Reality: Control > Autonomy
1429
+
1430
+ While evolution is the future, reliability is the present requirement. Research into deployed systems (*[Measuring Agents in Production](https://arxiv.org/abs/2512.04123)*) shows that successful agents rely on **constrained deployment** and **human oversight**, not open-ended "magic."
1431
+
1432
+ Tactus bridges this gap. It offers the **evolutionary potential** of "Agent as Code" while enforcing the **production reliability** of a strict Lua runtime. You get:
1433
+
1434
+ * **Controllability**: Explicit loops and conditionals, not black-box planning.
1435
+ * **Human-in-the-Loop**: First-class primitives for approval and oversight.
1436
+ * **Bounded Autonomy**: The "Give an Agent a Tool" paradigm—defining capabilities and goals—within a controlled environment.
1437
+
1438
+ ## Related Projects
1439
+
1440
+ The AI agent space is crowded. This section explains how Tactus differs from alternatives and why you might choose it.
1441
+
1442
+ **Tactus's core differentiator**: Most frameworks embed orchestration in Python (or another host language). Tactus uses a dedicated DSL (Lua) that is token-efficient, sandboxed, and designed to be readable and modifiable by AI agents themselves. This enables self-evolution patterns where agents can inspect and rewrite their own workflow definitions—a capability that's difficult when logic is scattered across Python classes.
1443
+
1444
+ ### DSPy
1445
+
1446
+ [DSPy](https://dspy.ai) (Declarative Self-improving Python) is the engine that powers Tactus's intelligence layer. Tactus integrates DSPy directly, allowing you to define DSPy Modules, Signatures, and Optimizers within your `.tac` files using a clean Lua syntax.
1447
+
1448
+ While DSPy provides the primitives for programming with language models (optimizing prompts, few-shot examples, and reasoning steps), Tactus provides the **orchestration layer** that makes these components production-ready:
1449
+
1450
+ - **Durability**: Tactus handles checkpointing and resuming DSPy module calls transparently.
1451
+ - **Orchestration**: Tactus manages the control flow (loops, conditionals) around your DSPy modules.
1452
+ - **Human-in-the-Loop**: Tactus allows humans to inspect, approve, or correct DSPy module outputs.
1453
+ - **Sandboxing**: Tactus runs DSPy components in a safe, sandboxed environment suitable for user-contributed code.
1454
+
1455
+ You can use Tactus to define standard DSPy modules:
1456
+
1457
+ ```lua
1458
+ -- Define a DSPy Module with a typed signature
1459
+ qa = Module {
1460
+ signature = "question -> answer",
1461
+ strategy = "chain_of_thought"
1462
+ }
1463
+
1464
+ -- Invoke it as part of a durable workflow
1465
+ local result = qa({question = "How does this work?"})
1466
+ ```
1467
+
1468
+ Tactus and DSPy work together: DSPy handles the *thinking* (optimizing how to get the best answer), while Tactus handles the *doing* (ensuring the workflow completes reliably, even if it takes days).
1469
+
1470
+ | | DSPy (Python) | Tactus (Lua) |
1471
+ |---|---|---|
1472
+ | **Role** | Intelligence Engine | Orchestration Engine |
1473
+ | **Focus** | Prompt optimization, reasoning | Durability, HITL, Sandboxing |
1474
+ | **Definition** | Python classes | Lua DSL primitives |
1475
+ | **State** | In-memory | Persisted & Resumable |
1476
+ | **Optimization** | Automatic (Teleprompters) | Agent-driven or Manual |
1477
+
1478
+ ### LangGraph
1479
+
1480
+ [LangGraph](https://github.com/langchain-ai/langgraph) is LangChain's graph-based workflow engine. Like Tactus, it emphasizes explicit control flow over autonomous agent behavior—you define nodes, edges, and state transitions rather than letting agents decide what to do next.
1481
+
1482
+ The key difference is the host language. LangGraph embeds workflows in Python using a `StateGraph` API, while Tactus uses Lua. This matters for two reasons: (1) Lua is more token-efficient when included in LLM context, and (2) Lua's sandboxed execution makes it safer for AI-generated or user-contributed code. If you need agents to read, understand, and modify their own orchestration logic, a dedicated DSL is more tractable than Python class hierarchies.
1483
+
1484
+ | | LangGraph | Tactus |
1485
+ |-|-----------|--------|
1486
+ | **Orchestration language** | Python (StateGraph API) | Lua DSL |
1487
+ | **State management** | Explicit, graph-based | Explicit, imperative |
1488
+ | **HITL** | Interrupt nodes + persistent state | First-class primitives (`Human.approve()`, etc.) |
1489
+ | **Self-evolution** | Difficult (logic in Python) | Designed for it (logic in readable DSL) |
1490
+ | **Ecosystem** | LangChain integration | Standalone, uses Pydantic-AI |
1491
+
1492
+ ### CrewAI
1493
+
1494
+ [CrewAI](https://github.com/crewAIInc/crewAI) takes a role-based approach where agents are modeled as team members with specific responsibilities. You define a "crew" of agents with roles, goals, and backstories, then let them collaborate on tasks.
1495
+
1496
+ This paradigm is intuitive for certain use cases, but it imposes a specific mental model. All naming, configuration, and documentation is built around the crew/worker metaphor. If you want that structure, CrewAI provides it out of the box. If you find it constraining—or want your orchestration logic to be AI-readable without anthropomorphic abstractions—Tactus offers more flexibility.
1497
+
1498
+ CrewAI recently added "Flows" for more explicit control, narrowing the gap with graph-based frameworks. But the underlying paradigm remains role-centric rather than workflow-centric.
1499
+
1500
+ ### Vendor Frameworks
1501
+
1502
+ The major AI companies have released their own agent frameworks:
1503
+
1504
+ - **[OpenAI Agents SDK](https://openai.github.io/openai-agents-python/)** — Production evolution of OpenAI Swarm. Lightweight primitives (Agents, Handoffs, Guardrails) for multi-agent orchestration. Tightly coupled to OpenAI's ecosystem.
1505
+
1506
+ - **[Google ADK](https://google.github.io/adk-docs/)** (Agent Development Kit) — Modular framework with workflow agents (Sequential, Parallel, Loop) and LLM agents. Optimized for Gemini and Vertex AI deployment.
1507
+
1508
+ - **[Microsoft AutoGen](https://github.com/microsoft/autogen)** — Conversation-driven multi-agent framework where agents coordinate through message passing.
1509
+
1510
+ - **[Meta Llama Stack](https://ai.meta.com/blog/meta-llama-3-1/)** — Standardized interfaces for building agentic applications with Llama models. More of an API specification than a workflow framework.
1511
+
1512
+ These frameworks are valuable if you're committed to a specific vendor's ecosystem. Tactus is model-agnostic (via [DSPy](https://dspy.ai)) and designed to run anywhere—local, cloud, or AWS Lambda Durable Functions.
1513
+
1514
+ ### Other Tools
1515
+
1516
+ - **[Pydantic-AI](https://github.com/pydantic/pydantic-ai)** — Used for type-safe tool definitions and message structures.
1517
+
1518
+ - **[Guidance](https://github.com/guidance-ai/guidance)** (Microsoft) — Interleaves constrained generation with control flow. Focuses on token-level control during generation rather than workflow orchestration.
1519
+
1520
+ ## Complete Feature List
1521
+
1522
+ - **Durable Execution**: Automatic position-based checkpointing for all operations (agent turns, model predictions, sub-procedure calls, HITL interactions) with replay-based recovery—resume from exactly where you left off after crashes, timeouts, or pauses
1523
+ - **DSPy Integration**: First-class support for DSPy Modules and Signatures, enabling declarative machine learning components and prompt optimization alongside agentic workflows
1524
+ - **Model Primitive**: First-class support for ML inference (PyTorch, HTTP, HuggingFace Transformers) with automatic checkpointing—distinct from conversational agents for classification, prediction, and transformation tasks
1525
+ - **Script Mode**: Write procedures without explicit `main` definitions—top-level `input`/`output` declarations and code automatically wrapped as the main procedure
1526
+ - **State Management**: Typed, schema-validated persistent state with automatic initialization from defaults and runtime validation
1527
+ - **Explicit Checkpoints**: Manual `checkpoint()` primitive for saving state at strategic points without suspending execution
1528
+ - **Imperative Lua DSL**: Define agent workflows with full programmatic control using a token-efficient, sandboxed language designed for AI manipulation
1529
+ - **Multi-Provider Support**: Use OpenAI and AWS Bedrock models in the same workflow
1530
+ - **Multi-Model Support**: Different agents can use different models (GPT-4o, Claude, etc.)
1531
+ - **Human-in-the-Loop**: Built-in support for human approval, input, and review with automatic checkpointing
1532
+ - **Cost & Performance Tracking**: Granular tracking of costs, tokens, latency, retries, cache usage, and comprehensive metrics per agent and procedure
1533
+ - **BDD Testing**: First-class Gherkin specifications for testing agent behavior
1534
+ - **Asynchronous Execution**: Native async I/O for efficient LLM workflows
1535
+ - **Context Engineering**: Fine-grained control over conversation history per agent
1536
+ - **Typed Input/Output**: JSON Schema validation with UI generation support using `input`/`output`/`state` declarations
1537
+ - **Pluggable Backends**: Storage, HITL, and chat recording via Pydantic protocols
1538
+ - **LLM Integration**: Works with OpenAI and Bedrock via [DSPy](https://dspy.ai)
1539
+ - **Standalone CLI**: Run workflows without any infrastructure
1540
+ - **Type-Safe**: Pydantic models throughout for validation and type safety
1541
+
1542
+ **Note**: Some features from the [specification](SPECIFICATION.md) are not yet implemented, including `guards`, `dependencies`, inline procedure definitions, and advanced HITL configuration. See [IMPLEMENTATION.md](IMPLEMENTATION.md) for the complete status.
1543
+
1544
+ ## Architecture
1545
+
1546
+ Tactus is built around three core abstractions:
1547
+
1548
+ 1. **StorageBackend**: Persists procedure state and checkpoints
1549
+ 2. **HITLHandler**: Manages human-in-the-loop interactions
1550
+ 3. **ChatRecorder**: Records conversation history
1551
+
1552
+ These are defined as Pydantic protocols, allowing you to plug in any implementation:
1553
+
1554
+ ```python
1555
+ from tactus import TactusRuntime
1556
+ from tactus.adapters.memory import MemoryStorage
1557
+ from tactus.adapters.cli_hitl import CLIHITLHandler
1558
+
1559
+ runtime = TactusRuntime(
1560
+ procedure_id="my-workflow",
1561
+ storage_backend=MemoryStorage(),
1562
+ hitl_handler=CLIHITLHandler(),
1563
+ chat_recorder=None # Optional
1564
+ )
1565
+
1566
+ result = await runtime.execute(yaml_config, context)
1567
+ ```
1568
+
1569
+ ## CLI Commands
1570
+
1571
+ ### Running Procedures
1572
+
1573
+ ```bash
1574
+ # Run a procedure
1575
+ tactus run workflow.tac
1576
+
1577
+ # Run with parameters (supports all types)
1578
+ tactus run workflow.tac --param name="Alice" --param count=5
1579
+ tactus run workflow.tac --param enabled=true --param items='[1,2,3]'
1580
+ tactus run workflow.tac --param config='{"key":"value","nested":{"data":true}}'
1581
+
1582
+ # Interactive mode - prompts for all inputs with confirmation
1583
+ tactus run workflow.tac --interactive
1584
+
1585
+ # Missing required inputs will prompt automatically
1586
+ tactus run workflow.tac # If procedure has required inputs, you'll be prompted
1587
+
1588
+ # Use file storage (instead of memory)
1589
+ tactus run workflow.tac --storage file --storage-path ./data
1590
+ ```
1591
+
1592
+ #### Logging Options
1593
+
1594
+ The `run` command supports filtering and formatting logs:
1595
+
1596
+ ```bash
1597
+ # Show less/more output
1598
+ tactus run workflow.tac --log-level warning
1599
+ tactus run workflow.tac --log-level debug
1600
+
1601
+ # Choose a log format
1602
+ tactus run workflow.tac --log-format rich # default, grouped timestamps
1603
+ tactus run workflow.tac --log-format terminal # no timestamps, higher-signal terminal output
1604
+ tactus run workflow.tac --log-format raw # one-line-per-record, timestamped (CloudWatch-friendly)
1605
+ ```
1606
+
1607
+ The CLI automatically parses parameter types:
1608
+ - **Strings**: Direct values or quoted strings
1609
+ - **Numbers**: Integers or floats are auto-detected
1610
+ - **Booleans**: `true`, `false`, `yes`, `no`, `1`, `0`
1611
+ - **Arrays**: JSON arrays like `'[1,2,3]'` or comma-separated `"a,b,c"`
1612
+ - **Objects**: JSON objects like `'{"key":"value"}'`
1613
+
1614
+ When you run a procedure, you'll see real-time execution output:
1615
+
1616
+ ```
1617
+ Running procedure: workflow.tac (lua format)
1618
+
1619
+ → Agent researcher: Waiting for response...
1620
+ Hello! I'll help you with that task.
1621
+ ✓ Agent researcher: Completed 1204ms
1622
+ → Tool done {"reason": "Task completed successfully"}
1623
+ Result: Done
1624
+ $ Cost researcher: $0.001267 (354 tokens, openai:gpt-4o, 1204ms)
1625
+
1626
+ ✓ Procedure completed: 1 iterations, 1 tools used
1627
+
1628
+ $ Cost Summary
1629
+ Total Cost: $0.001267
1630
+ Total Tokens: 354
1631
+ ```
1632
+
1633
+ ### Inspecting Procedures
1634
+
1635
+ ```bash
1636
+ # View procedure metadata (agents, tools, parameters, outputs)
1637
+ tactus info workflow.tac
1638
+ ```
1639
+
1640
+ Example output:
1641
+
1642
+ ```
1643
+ Procedure info: workflow.tac
1644
+
1645
+ Parameters:
1646
+ task: string (required)
1647
+ count: number default: 3
1648
+
1649
+ Outputs:
1650
+ result: string (required) - Summary of the completed work
1651
+
1652
+ Agents:
1653
+ researcher:
1654
+ Provider: openai
1655
+ Model: gpt-4o
1656
+ Tools: search, analyze, done
1657
+ Prompt: You are a research assistant...
1658
+ ```
1659
+
1660
+ ### Validation and Testing
1661
+
1662
+ ```bash
1663
+ # Validate syntax and structure
1664
+ tactus validate workflow.tac
1665
+
1666
+ # Format a workflow file (2-space indentation + normalized spacing)
1667
+ tactus format workflow.tac
1668
+
1669
+ # Check formatting without rewriting (exit 1 if changes needed)
1670
+ tactus format workflow.tac --check
1671
+
1672
+ # Run BDD specifications
1673
+ tactus test workflow.tac
1674
+
1675
+ # Test consistency across multiple runs
1676
+ tactus test workflow.tac --runs 10
1677
+
1678
+ # Evaluate with Pydantic AI Evals
1679
+ tactus eval workflow.tac --runs 10
1680
+ ```
1681
+
1682
+ The `format` command uses Tactus's Lua parser to reindent and normalize whitespace while preserving the structure of the code.
1683
+
1684
+ ### Understanding Output
1685
+
1686
+ The CLI displays several types of events:
1687
+
1688
+ - **→ Agent [name]**: Agent is processing (starts with → symbol)
1689
+ - **✓ Agent [name]**: Agent completed (shows duration)
1690
+ - **→ Tool [name]**: Tool was called (shows arguments and result)
1691
+ - **$ Cost [name]**: Cost breakdown (tokens, model, duration)
1692
+
1693
+ All commands that execute workflows display comprehensive cost and performance metrics.
1694
+
1695
+ ## Tactus IDE
1696
+
1697
+ Tactus includes a full-featured IDE for editing `.tac` files with instant feedback and intelligent code completion.
1698
+
1699
+ ### Features
1700
+
1701
+ - **Instant syntax validation** - TypeScript parser provides immediate feedback (< 10ms)
1702
+ - **Semantic intelligence** - Python LSP server for completions and hover info
1703
+ - **Monaco Editor** - Same editor as VS Code
1704
+ - **Hybrid validation** - Fast client-side syntax + smart backend semantics
1705
+ - **Offline capable** - Basic editing works without backend
1706
+ - **Cross-platform** - Built with Electron for desktop support
1707
+
1708
+ ### Architecture: Hybrid Validation
1709
+
1710
+ The IDE uses a two-layer validation approach for optimal performance:
1711
+
1712
+ **Layer 1: TypeScript Parser (Client-Side, Instant)**
1713
+ - Validates syntax as you type (< 10ms)
1714
+ - Works offline, no backend needed
1715
+ - Shows syntax errors immediately
1716
+ - ANTLR-generated from same grammar as Python parser
1717
+
1718
+ **Layer 2: Python LSP (Backend, Semantic)**
1719
+ - Provides intelligent completions
1720
+ - Hover documentation for agents, parameters, outputs
1721
+ - Cross-reference validation
1722
+ - Debounced (300ms) to reduce load
1723
+
1724
+ This provides the best of both worlds: zero-latency syntax checking with intelligent semantic features.
1725
+
1726
+ ### Running the IDE
1727
+
1728
+ ```bash
1729
+ # Terminal 1: Start the backend LSP server
1730
+ cd tactus-ide/backend
1731
+ pip install -r requirements.txt
1732
+ python app.py # Runs on port 5001
1733
+
1734
+ # Terminal 2: Start the IDE frontend
1735
+ cd tactus-ide/frontend
1736
+ npm install
1737
+ npm run dev # Runs on port 3000
1738
+ ```
1739
+
1740
+ Open http://localhost:3000 in your browser to use the IDE.
1741
+
1742
+ **Note**: Backend uses port 5001 (not 5000) because macOS AirPlay Receiver uses port 5000.
1743
+
1744
+ ### Validation Layers in Action
1745
+
1746
+ **Layer 1: TypeScript (Instant)**
1747
+ - Syntax errors (missing braces, parentheses)
1748
+ - Bracket matching
1749
+ - Basic structure validation
1750
+ - Works offline
1751
+
1752
+ **Layer 2: Python LSP (Semantic)**
1753
+ - Missing required fields (e.g., agent without provider)
1754
+ - Cross-reference validation (e.g., undefined agent referenced)
1755
+ - Context-aware completions
1756
+ - Hover documentation
1757
+ - Signature help
1758
+
1759
+ ## Integration
1760
+
1761
+ Tactus is designed to be integrated into larger systems. You can create custom adapters for your storage backend, HITL system, and chat recording.
1762
+
1763
+ ## Development
1764
+
1765
+ ```bash
1766
+ # Clone the repository
1767
+ git clone https://github.com/AnthusAI/Tactus.git
1768
+ cd Tactus
1769
+
1770
+ # Install with dev dependencies
1771
+ pip install -e ".[dev]"
1772
+
1773
+ # Run tests
1774
+ behave --summary # BDD integration tests
1775
+ pytest tests/ # Unit tests
1776
+
1777
+ # Run with coverage
1778
+ pytest --cov=tactus --cov-report=html
1779
+
1780
+ # See tactus/testing/README.md for detailed testing documentation
1781
+ ```
1782
+
1783
+ ### Parser Generation
1784
+
1785
+ Tactus uses ANTLR4 to generate parsers from the Lua grammar for validation.
1786
+
1787
+ **Requirements:**
1788
+ - **Docker** (required only for regenerating parsers)
1789
+ - Generated parsers are committed to repo
1790
+
1791
+ **When to regenerate:**
1792
+ - Only when modifying grammar files in `tactus/validation/grammar/`
1793
+ - Not needed for normal development
1794
+
1795
+ **How to regenerate:**
1796
+ ```bash
1797
+ # Ensure Docker is running
1798
+ make generate-parsers
1799
+
1800
+ # Or individually:
1801
+ make generate-python-parser
1802
+ make generate-typescript-parser
1803
+ ```
1804
+
1805
+ See `tactus/validation/README.md` for detailed documentation.
1806
+
1807
+ ## License
1808
+
1809
+ MIT License - see LICENSE file for details.