tactus 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ Built-in step definitions for Tactus primitives.
3
3
 
4
4
  Provides a comprehensive library of steps for testing:
5
5
  - Tool calls
6
- - Stage transitions
7
6
  - State management
8
7
  - Procedure completion
9
8
  - Iterations and timing
@@ -14,6 +13,7 @@ Provides a comprehensive library of steps for testing:
14
13
 
15
14
  import logging
16
15
  import re
16
+ import ast
17
17
  from typing import Any
18
18
 
19
19
  from .registry import StepRegistry
@@ -22,6 +22,30 @@ from .registry import StepRegistry
22
22
  logger = logging.getLogger(__name__)
23
23
 
24
24
 
25
+ def _parse_step_string_literal(value: str) -> tuple[str, bool]:
26
+ """
27
+ Parse an optional quoted string literal from a step capture group.
28
+
29
+ Supports single-quoted or double-quoted Python-style escapes, e.g.:
30
+ "Hello! I'm World"
31
+ 'He said: "hi"'
32
+ "Line 1\\nLine 2"
33
+
34
+ Returns:
35
+ (parsed_value, was_quoted)
36
+ """
37
+ stripped = value.strip()
38
+ if len(stripped) >= 2 and stripped[0] in {"'", '"'} and stripped[-1] == stripped[0]:
39
+ try:
40
+ parsed = ast.literal_eval(stripped)
41
+ if isinstance(parsed, str):
42
+ return parsed, True
43
+ except Exception:
44
+ # Fall back to raw string if the literal is malformed.
45
+ return stripped, True
46
+ return value, False
47
+
48
+
25
49
  def register_builtin_steps(registry: StepRegistry) -> None:
26
50
  """
27
51
  Register all built-in step definitions.
@@ -32,9 +56,6 @@ def register_builtin_steps(registry: StepRegistry) -> None:
32
56
  # Tool-related steps
33
57
  register_tool_steps(registry)
34
58
 
35
- # Stage-related steps
36
- register_stage_steps(registry)
37
-
38
59
  # State-related steps
39
60
  register_state_steps(registry)
40
61
 
@@ -69,34 +90,40 @@ def register_builtin_steps(registry: StepRegistry) -> None:
69
90
  def register_tool_steps(registry: StepRegistry) -> None:
70
91
  """Register tool-related step definitions."""
71
92
 
72
- registry.register(r"the (?P<tool>\w+) tool should be called", step_tool_called)
93
+ registry.register(r"the (?P<tool>[-\w]+) tool should be called", step_tool_called)
73
94
 
74
- registry.register(r"the (?P<tool>\w+) tool should not be called", step_tool_not_called)
95
+ registry.register(r"the (?P<tool>[-\w]+) tool should not be called", step_tool_not_called)
75
96
 
76
97
  registry.register(
77
- r"the (?P<tool>\w+) tool should be called at least (?P<n>\d+) time",
98
+ r"the (?P<tool>[-\w]+) tool should be called at least (?P<n>\d+) time",
78
99
  step_tool_called_at_least,
79
100
  )
80
101
 
81
102
  registry.register(
82
- r"the (?P<tool>\w+) tool should be called at least (?P<n>\d+) times",
103
+ r"the (?P<tool>[-\w]+) tool should be called at least (?P<n>\d+) times",
83
104
  step_tool_called_at_least,
84
105
  )
85
106
 
86
107
  registry.register(
87
- r"the (?P<tool>\w+) tool should be called exactly (?P<n>\d+) time", step_tool_called_exactly
108
+ r"the (?P<tool>[-\w]+) tool should be called exactly (?P<n>\d+) time",
109
+ step_tool_called_exactly,
88
110
  )
89
111
 
90
112
  registry.register(
91
- r"the (?P<tool>\w+) tool should be called exactly (?P<n>\d+) times",
113
+ r"the (?P<tool>[-\w]+) tool should be called exactly (?P<n>\d+) times",
92
114
  step_tool_called_exactly,
93
115
  )
94
116
 
95
117
  registry.register(
96
- r"the (?P<tool>\w+) tool should be called with (?P<param>\w+)=(?P<value>.+)",
118
+ r"the (?P<tool>[-\w]+) tool should be called with (?P<param>\w+)=(?P<value>.+)",
97
119
  step_tool_called_with_param,
98
120
  )
99
121
 
122
+ registry.register(
123
+ r'the tool "(?P<tool>[-\w]+)" returns (?P<value>.+)',
124
+ step_mock_tool_returns,
125
+ )
126
+
100
127
 
101
128
  def step_tool_called(context: Any, tool: str) -> None:
102
129
  """Check if a tool was called."""
@@ -132,24 +159,20 @@ def step_tool_called_with_param(context: Any, tool: str, param: str, value: str)
132
159
  assert found, f"Tool '{tool}' was not called with {param}={value}"
133
160
 
134
161
 
135
- # Stage-related steps
136
-
137
-
138
- def register_stage_steps(registry: StepRegistry) -> None:
139
- """Register stage-related step definitions."""
140
-
141
- registry.register(r"the procedure has started", step_procedure_started)
142
-
143
- registry.register(r"the stage is (?P<stage>\w+)", step_stage_is)
144
-
145
- registry.register(r"the stage should be (?P<stage>\w+)", step_stage_is)
162
+ def step_mock_tool_returns(context: Any, tool: str, value: str) -> None:
163
+ """Configure a runtime tool mock response for this scenario."""
164
+ parsed_value, was_quoted = _parse_step_string_literal(value)
165
+ if not was_quoted:
166
+ try:
167
+ parsed_value = ast.literal_eval(parsed_value)
168
+ except Exception:
169
+ # Treat unquoted values as plain strings (e.g., positive/neutral)
170
+ pass
146
171
 
147
- registry.register(
148
- r"the stage should transition from (?P<from_stage>\w+) to (?P<to_stage>\w+)",
149
- step_stage_transition,
150
- )
172
+ if not hasattr(context, "mock_tool_returns"):
173
+ raise AssertionError("Context does not support tool mocking")
151
174
 
152
- registry.register(r"we are in stage (?P<stage>\w+)", step_in_stage)
175
+ context.mock_tool_returns(tool, parsed_value)
153
176
 
154
177
 
155
178
  def step_procedure_started(context: Any) -> None:
@@ -159,32 +182,6 @@ def step_procedure_started(context: Any) -> None:
159
182
  assert context is not None, "Test context not initialized"
160
183
 
161
184
 
162
- def step_stage_is(context: Any, stage: str) -> None:
163
- """Check if current stage matches expected."""
164
- current = context.current_stage()
165
- assert current == stage, f"Expected stage '{stage}', but current stage is '{current}'"
166
-
167
-
168
- def step_stage_transition(context: Any, from_stage: str, to_stage: str) -> None:
169
- """Check if stage transition occurred."""
170
- history = context.stage_history()
171
-
172
- # Build list of transitions
173
- transitions = [(history[i], history[i + 1]) for i in range(len(history) - 1)]
174
-
175
- expected_transition = (from_stage, to_stage)
176
- assert expected_transition in transitions, (
177
- f"Stage transition from '{from_stage}' to '{to_stage}' did not occur. "
178
- f"Actual transitions: {transitions}"
179
- )
180
-
181
-
182
- def step_in_stage(context: Any, stage: str) -> None:
183
- """Check if currently in specified stage."""
184
- current = context.current_stage()
185
- assert current == stage, f"Not in stage '{stage}', current stage is '{current}'"
186
-
187
-
188
185
  # State-related steps
189
186
 
190
187
 
@@ -201,8 +198,12 @@ def register_state_steps(registry: StepRegistry) -> None:
201
198
  def step_state_equals(context: Any, key: str, value: str) -> None:
202
199
  """Check if state value equals expected."""
203
200
  actual = context.state_get(key)
201
+ value, was_quoted = _parse_step_string_literal(value)
204
202
  # Convert to string for comparison
205
203
  actual_str = str(actual) if actual is not None else "None"
204
+ if was_quoted:
205
+ assert actual_str == value, f"State '{key}' is '{actual_str}', expected '{value}'"
206
+ return
206
207
  assert actual_str == value, f"State '{key}' is '{actual_str}', expected '{value}'"
207
208
 
208
209
 
@@ -224,6 +225,14 @@ def step_state_contains(context: Any, key: str) -> None:
224
225
  def register_output_steps(registry: StepRegistry) -> None:
225
226
  """Register output-related step definitions."""
226
227
 
228
+ registry.register(r"the output should exist", step_output_value_exists)
229
+ registry.register(r"the output should be (?P<value>.+)", step_output_value_equals)
230
+ registry.register(
231
+ r"the output should fuzzy match (?P<value>.+) with threshold (?P<threshold>[0-9]*\.?[0-9]+)",
232
+ step_output_value_fuzzy_match,
233
+ )
234
+ registry.register(r"the output should fuzzy match (?P<value>.+)", step_output_value_fuzzy_match)
235
+
227
236
  registry.register(r"the output (?P<key>\w+) should be (?P<value>.+)", step_output_equals)
228
237
 
229
238
  registry.register(
@@ -238,6 +247,11 @@ def register_output_steps(registry: StepRegistry) -> None:
238
247
  def step_output_equals(context: Any, key: str, value: str) -> None:
239
248
  """Check if output value equals expected."""
240
249
  actual = context.output_get(key)
250
+ value, was_quoted = _parse_step_string_literal(value)
251
+ if was_quoted:
252
+ actual_str = str(actual) if actual is not None else "None"
253
+ assert actual_str == value, f"Output '{key}' is '{actual_str}', expected '{value}'"
254
+ return
241
255
 
242
256
  # Handle boolean comparison specially
243
257
  if value.lower() in ("true", "false"):
@@ -266,9 +280,132 @@ def step_output_equals(context: Any, key: str, value: str) -> None:
266
280
  assert actual_str == value, f"Output '{key}' is '{actual_str}', expected '{value}'"
267
281
 
268
282
 
283
+ def step_output_value_exists(context: Any) -> None:
284
+ """Check if scalar output exists (non-None)."""
285
+ actual = context.output_value()
286
+ assert actual is not None, "Output is missing"
287
+
288
+
289
+ def step_output_value_equals(context: Any, value: str) -> None:
290
+ """Check if scalar output equals expected."""
291
+ actual = context.output_value()
292
+ value, was_quoted = _parse_step_string_literal(value)
293
+ if was_quoted:
294
+ actual_str = str(actual) if actual is not None else "None"
295
+ assert actual_str == value, f"Output is '{actual_str}', expected '{value}'"
296
+ return
297
+
298
+ # Handle boolean comparison specially
299
+ if value.lower() in ("true", "false"):
300
+ expected_bool = value.lower() == "true"
301
+ if isinstance(actual, bool):
302
+ assert actual == expected_bool, f"Output is {actual}, expected {expected_bool}"
303
+ else:
304
+ actual_str = str(actual).lower() if actual is not None else "none"
305
+ assert actual_str == value.lower(), f"Output is '{actual}', expected '{value}'"
306
+ return
307
+
308
+ # Try numeric comparison first
309
+ try:
310
+ expected_num = float(value)
311
+ if isinstance(actual, (int, float)):
312
+ assert actual == expected_num, f"Output is {actual}, expected {expected_num}"
313
+ else:
314
+ actual_num = float(actual)
315
+ assert actual_num == expected_num, f"Output is '{actual}', expected {expected_num}"
316
+ return
317
+ except (ValueError, TypeError):
318
+ pass
319
+
320
+ actual_str = str(actual) if actual is not None else "None"
321
+ assert actual_str == value, f"Output is '{actual_str}', expected '{value}'"
322
+
323
+
324
+ def step_output_value_fuzzy_match(context: Any, value: str, threshold: str = "0.8") -> None:
325
+ """Check if scalar output is similar to expected value above a threshold.
326
+
327
+ This is a deterministic, non-LLM fuzzy match based on string similarity.
328
+
329
+ Default behavior:
330
+ - Case-insensitive (compares lowercased text)
331
+ - Punctuation-insensitive (strips punctuation)
332
+
333
+ Multi-match syntax (best-effort):
334
+ Then the output should fuzzy match any of ["Hello", "Hi", "Hey"] with threshold 0.9
335
+ """
336
+ import difflib
337
+
338
+ def _normalize_text(text: str) -> str:
339
+ # Lowercase + strip punctuation + collapse whitespace.
340
+ normalized = re.sub(r"[^\w\s]", "", text.lower())
341
+ normalized = re.sub(r"\s+", " ", normalized).strip()
342
+ return normalized
343
+
344
+ actual = context.output_value()
345
+ assert actual is not None, "Output is missing"
346
+
347
+ try:
348
+ threshold_f = float(threshold)
349
+ except ValueError:
350
+ raise AssertionError(f"Invalid threshold: {threshold}")
351
+
352
+ expected_raw, was_quoted = _parse_step_string_literal(value)
353
+ expected_raw = expected_raw.strip() if not was_quoted else expected_raw
354
+
355
+ expected_values: list[str]
356
+
357
+ if expected_raw.lower().startswith("any of "):
358
+ values_str = expected_raw[7:].strip()
359
+ try:
360
+ parsed = ast.literal_eval(values_str)
361
+ except Exception:
362
+ parsed = None
363
+
364
+ expected_values = []
365
+ if isinstance(parsed, (list, tuple)):
366
+ for item in parsed:
367
+ expected_values.append(item if isinstance(item, str) else str(item))
368
+ else:
369
+ parts = [p.strip() for p in values_str.split(",") if p.strip()]
370
+ for part in parts:
371
+ parsed_part, _ = _parse_step_string_literal(part)
372
+ expected_values.append(parsed_part)
373
+
374
+ if not expected_values:
375
+ raise AssertionError(f"No expected values provided: {value}")
376
+ else:
377
+ expected_values = [expected_raw]
378
+
379
+ actual_norm = _normalize_text(str(actual))
380
+ best_ratio = -1.0
381
+ best_expected = None
382
+
383
+ for expected in expected_values:
384
+ expected_norm = _normalize_text(expected)
385
+ if expected_norm and (expected_norm in actual_norm or actual_norm in expected_norm):
386
+ ratio = 1.0
387
+ else:
388
+ ratio = difflib.SequenceMatcher(None, actual_norm, expected_norm).ratio()
389
+
390
+ if ratio > best_ratio:
391
+ best_ratio = ratio
392
+ best_expected = expected
393
+
394
+ assert best_ratio >= threshold_f, (
395
+ f"Output similarity is {best_ratio:.3f} (threshold {threshold_f:.3f}). "
396
+ f"Output is '{actual}', best match was '{best_expected}'. "
397
+ f"Expected: {expected_values}"
398
+ )
399
+
400
+
269
401
  def step_output_not_equals(context: Any, key: str, value: str) -> None:
270
402
  """Check if output value does not equal the specified value."""
271
403
  actual = context.output_get(key)
404
+ value, was_quoted = _parse_step_string_literal(value)
405
+ if was_quoted:
406
+ actual_str = str(actual) if actual is not None else "None"
407
+ assert actual_str != value, f"Output '{key}' is '{actual_str}', should not be '{value}'"
408
+ return
272
409
 
273
410
  # Handle boolean comparison specially
274
411
  if value.lower() in ("true", "false"):
@@ -319,6 +456,7 @@ def step_output_contains(context: Any, key: str) -> None:
319
456
  def register_completion_steps(registry: StepRegistry) -> None:
320
457
  """Register completion-related step definitions."""
321
458
 
459
+ registry.register(r"the procedure has started", step_procedure_started)
322
460
  registry.register(r"the procedure should complete successfully", step_procedure_completes)
323
461
 
324
462
  registry.register(r"the procedure should fail", step_procedure_fails)
@@ -478,6 +616,23 @@ def register_agent_steps(registry: StepRegistry) -> None:
478
616
 
479
617
  registry.register(r"the (?P<agent>\w+) agent takes turns", step_agent_takes_turn)
480
618
 
619
+ registry.register(
620
+ r'the agent "(?P<agent>[^"]+)" responds with (?P<message>.+)',
621
+ step_mock_agent_responds_with,
622
+ )
623
+
624
+ registry.register(
625
+ r'the agent "(?P<agent>[^"]+)" calls tool "(?P<tool>[^"]+)" with args (?P<args>.+)',
626
+ step_mock_agent_calls_tool_with_args,
627
+ )
628
+
629
+ registry.register(
630
+ r'the agent "(?P<agent>[^"]+)" returns data (?P<data>.+)',
631
+ step_mock_agent_returns_data,
632
+ )
633
+
634
+ registry.register(r"the message is (?P<message>.+)", step_set_scenario_message)
635
+
481
636
  registry.register(r"the procedure run", step_procedure_runs)
482
637
 
483
638
  registry.register(r"the procedure runs", step_procedure_runs)
@@ -490,6 +645,61 @@ def step_agent_takes_turn(context: Any, agent: str) -> None:
490
645
  context.run_procedure()
491
646
 
492
647
 
648
+ def step_mock_agent_responds_with(
649
+ context: Any, agent: str, message: str, when_message: str | None = None
650
+ ) -> None:
651
+ """Configure a per-scenario mock agent response (temporal)."""
652
+ message, _ = _parse_step_string_literal(message)
653
+ when_message_parsed = None
654
+ if when_message is not None:
655
+ when_message_parsed, _ = _parse_step_string_literal(when_message)
656
+ if not hasattr(context, "mock_agent_response"):
657
+ raise AssertionError("Context does not support agent mocking")
658
+ context.mock_agent_response(agent, message, when_message=when_message_parsed)
659
+
660
+
661
+ def step_set_scenario_message(context: Any, message: str) -> None:
662
+ """Set the scenario's primary message for coordinating mocks with expectations."""
663
+ message, _ = _parse_step_string_literal(message)
664
+ if not hasattr(context, "set_scenario_message"):
665
+ raise AssertionError("Context does not support scenario message")
666
+ context.set_scenario_message(message)
667
+
668
+
669
+ def step_mock_agent_calls_tool_with_args(context: Any, agent: str, tool: str, args: str) -> None:
670
+ """Configure a per-scenario mocked agent tool call (recorded into Tool primitive)."""
671
+ args_str, _ = _parse_step_string_literal(args)
672
+ try:
673
+ parsed_args = ast.literal_eval(args_str)
674
+ except Exception:
675
+ raise AssertionError(f"Invalid tool args literal: {args}")
676
+
677
+ if not isinstance(parsed_args, dict):
678
+ raise AssertionError(f"Tool args must be an object/dict, got {type(parsed_args).__name__}")
679
+
680
+ if not hasattr(context, "mock_agent_tool_call"):
681
+ raise AssertionError("Context does not support agent tool call mocking")
682
+
683
+ context.mock_agent_tool_call(agent, tool, parsed_args)
684
+
685
+
686
+ def step_mock_agent_returns_data(context: Any, agent: str, data: str) -> None:
687
+ """Configure structured output mock data for an agent's next mocked turn."""
688
+ data_str, _ = _parse_step_string_literal(data)
689
+ try:
690
+ parsed = ast.literal_eval(data_str)
691
+ except Exception:
692
+ raise AssertionError(f"Invalid data literal: {data}")
693
+
694
+ if not isinstance(parsed, dict):
695
+ raise AssertionError(f"Data must be an object/dict, got {type(parsed).__name__}")
696
+
697
+ if not hasattr(context, "mock_agent_data"):
698
+ raise AssertionError("Context does not support agent data mocking")
699
+
700
+ context.mock_agent_data(agent, parsed)
701
+
702
+
493
703
  def step_procedure_runs(context: Any) -> None:
494
704
  """Execute the procedure.
495
705
 
@@ -532,7 +742,7 @@ def register_regex_steps(registry: StepRegistry) -> None:
532
742
 
533
743
  # Tool argument regex matching
534
744
  registry.register(
535
- r'the (?P<tool>\w+) tool should be called with (?P<param>\w+) matching pattern "(?P<pattern>.+)"',
745
+ r'the (?P<tool>[-\w]+) tool should be called with (?P<param>\w+) matching pattern "(?P<pattern>.+)"',
536
746
  step_tool_arg_matches_pattern,
537
747
  )
538
748
 
@@ -46,6 +46,8 @@ class TactusTestRunner:
46
46
  procedure_file: Path,
47
47
  mock_tools: Optional[Dict] = None,
48
48
  params: Optional[Dict] = None,
49
+ mcp_servers: Optional[Dict] = None,
50
+ tool_paths: Optional[List[str]] = None,
49
51
  mocked: bool = False,
50
52
  ):
51
53
  if not BEHAVE_AVAILABLE:
@@ -54,6 +56,8 @@ class TactusTestRunner:
54
56
  self.procedure_file = procedure_file
55
57
  self.mock_tools = mock_tools or {}
56
58
  self.params = params or {}
59
+ self.mcp_servers = mcp_servers or {}
60
+ self.tool_paths = tool_paths or []
57
61
  self.mocked = mocked # Whether to use mocked dependencies
58
62
  self.work_dir: Optional[Path] = None
59
63
  self.parsed_feature: Optional[ParsedFeature] = None
@@ -83,6 +87,8 @@ class TactusTestRunner:
83
87
  self.procedure_file,
84
88
  mock_tools=self.mock_tools,
85
89
  params=self.params,
90
+ mcp_servers=self.mcp_servers,
91
+ tool_paths=self.tool_paths,
86
92
  mocked=self.mocked,
87
93
  )
88
94
 
@@ -31,7 +31,6 @@ class TactusDSLVisitor(LuaParserVisitor):
31
31
  "Procedure", # CamelCase
32
32
  "Prompt", # CamelCase
33
33
  "Hitl", # CamelCase
34
- "Stages", # CamelCase
35
34
  "Specification", # CamelCase
36
35
  "Specifications", # CamelCase - Gherkin BDD specs
37
36
  "Step", # CamelCase - Custom step definitions
@@ -467,17 +466,18 @@ class TactusDSLVisitor(LuaParserVisitor):
467
466
  elif func_name == "Hitl": # CamelCase
468
467
  if args and len(args) >= 2:
469
468
  self.builder.register_hitl(args[0], args[1] if isinstance(args[1], dict) else {})
470
- elif func_name == "Stages": # CamelCase
471
- if args:
472
- # Stages() can take multiple string arguments
473
- self.builder.set_stages(args)
474
469
  elif func_name == "Specification": # CamelCase
475
- if args and len(args) >= 2:
470
+ # Either:
471
+ # - Specification([[ Gherkin text ]]) (alias for Specifications)
472
+ # - Specification("name", { ... }) (structured form)
473
+ if args and len(args) == 1:
474
+ self.builder.register_specifications(args[0])
475
+ elif args and len(args) >= 2:
476
476
  self.builder.register_specification(
477
477
  args[0], args[1] if isinstance(args[1], list) else []
478
478
  )
479
479
  elif func_name == "Specifications": # CamelCase
480
- # Specifications([[ Gherkin text ]])
480
+ # Specifications([[ Gherkin text ]]) (plural form; singular is Specification([[...]]))
481
481
  if args and len(args) >= 1:
482
482
  self.builder.register_specifications(args[0])
483
483
  elif func_name == "Step": # CamelCase
@@ -485,11 +485,19 @@ class TactusDSLVisitor(LuaParserVisitor):
485
485
  if args and len(args) >= 2:
486
486
  self.builder.register_custom_step(args[0], args[1])
487
487
  elif func_name == "Evaluation": # CamelCase
488
- # Evaluation({ runs = 10, parallel = true })
489
- if args and len(args) >= 1:
490
- self.builder.set_evaluation_config(args[0] if isinstance(args[0], dict) else {})
488
+ # Either:
489
+ # - Evaluation({ runs = 10, parallel = true }) (simple config)
490
+ # - Evaluation({ dataset = {...}, evaluators = {...}, ... }) (alias for Evaluations)
491
+ if args and len(args) >= 1 and isinstance(args[0], dict):
492
+ cfg = args[0]
493
+ if any(k in cfg for k in ("dataset", "dataset_file", "evaluators", "thresholds")):
494
+ self.builder.register_evaluations(cfg)
495
+ else:
496
+ self.builder.set_evaluation_config(cfg)
497
+ elif args and len(args) >= 1:
498
+ self.builder.set_evaluation_config({})
491
499
  elif func_name == "Evaluations": # CamelCase
492
- # Evaluations({ dataset = {...}, evaluators = {...} })
500
+ # Evaluation(s)({ dataset = {...}, evaluators = {...} })
493
501
  if args and len(args) >= 1:
494
502
  self.builder.register_evaluations(args[0] if isinstance(args[0], dict) else {})
495
503
  elif func_name == "default_provider":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tactus
3
- Version: 0.30.0
3
+ Version: 0.31.1
4
4
  Summary: Tactus: Lua-based DSL for agentic workflows
5
5
  Project-URL: Homepage, https://github.com/AnthusAI/Tactus
6
6
  Project-URL: Documentation, https://github.com/AnthusAI/Tactus/tree/main/docs
@@ -170,7 +170,7 @@ Procedure {
170
170
  end
171
171
  }
172
172
 
173
- Specifications([[
173
+ Specification([[
174
174
  Feature: Research
175
175
  Scenario: Completes research
176
176
  When the researcher agent takes turns
@@ -421,12 +421,12 @@ calculator = Agent {
421
421
  done = tactus.done
422
422
 
423
423
  text_processor = Agent {
424
- tools = {
425
- done,
424
+ inline_tools = {
426
425
  {name = "uppercase", input = {...}, handler = function(args)
427
426
  return string.upper(args.text)
428
427
  end}
429
- }
428
+ },
429
+ tools = {done}
430
430
  }
431
431
  ```
432
432
 
@@ -533,7 +533,7 @@ Procedure {
533
533
  end
534
534
  }
535
535
 
536
- Specifications([[
536
+ Specification([[
537
537
  Feature: Greeting
538
538
  Scenario: Agent greets and completes
539
539
  When the greeter agent takes turns
@@ -616,7 +616,7 @@ This creates a rhythm: **tool call → summarization → tool call → summariza
616
616
 
617
617
  **Why this matters:**
618
618
 
619
- Without per-call control, an agent might call another tool when you just want it to explain the previous result. By temporarily restricting tools to an empty set (`tools = {}`), you ensure the agent focuses on summarization.
619
+ Without per-call control, an agent might call another tool when you just want it to explain the previous result. By temporarily restricting toolsets to an empty set (`tools = {}`), you ensure the agent focuses on summarization.
620
620
 
621
621
  **Other per-call overrides:**
622
622
 
@@ -731,10 +731,9 @@ Then the search tool should be called exactly 2 times
731
731
  Then the search tool should be called with query=test
732
732
  ```
733
733
 
734
- **State & Stage Steps:**
734
+ **State Steps:**
735
735
  ```gherkin
736
736
  Given the procedure has started
737
- Then the stage should be processing
738
737
  Then the state count should be 5
739
738
  Then the state error should exist
740
739
  ```
@@ -866,10 +865,9 @@ Then the search tool should be called exactly 2 times
866
865
  Then the search tool should be called with query=test
867
866
  ```
868
867
 
869
- **State & Stage Steps:**
868
+ **State Steps:**
870
869
  ```gherkin
871
870
  Given the procedure has started
872
- Then the stage should be processing
873
871
  Then the state count should be 5
874
872
  Then the state error should exist
875
873
  ```