PyPI - tactus - Versions diffs - 0.31.0__py3-none-any.whl - Mend

tactus 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (160) hide show

tactus/__init__.py +49 -0
tactus/adapters/__init__.py +9 -0
tactus/adapters/broker_log.py +76 -0
tactus/adapters/cli_hitl.py +189 -0
tactus/adapters/cli_log.py +223 -0
tactus/adapters/cost_collector_log.py +56 -0
tactus/adapters/file_storage.py +367 -0
tactus/adapters/http_callback_log.py +109 -0
tactus/adapters/ide_log.py +71 -0
tactus/adapters/lua_tools.py +336 -0
tactus/adapters/mcp.py +289 -0
tactus/adapters/mcp_manager.py +196 -0
tactus/adapters/memory.py +53 -0
tactus/adapters/plugins.py +419 -0
tactus/backends/http_backend.py +58 -0
tactus/backends/model_backend.py +35 -0
tactus/backends/pytorch_backend.py +110 -0
tactus/broker/__init__.py +12 -0
tactus/broker/client.py +247 -0
tactus/broker/protocol.py +183 -0
tactus/broker/server.py +1123 -0
tactus/broker/stdio.py +12 -0
tactus/cli/__init__.py +7 -0
tactus/cli/app.py +2245 -0
tactus/cli/commands/__init__.py +0 -0
tactus/core/__init__.py +32 -0
tactus/core/config_manager.py +790 -0
tactus/core/dependencies/__init__.py +14 -0
tactus/core/dependencies/registry.py +180 -0
tactus/core/dsl_stubs.py +2117 -0
tactus/core/exceptions.py +66 -0
tactus/core/execution_context.py +480 -0
tactus/core/lua_sandbox.py +508 -0
tactus/core/message_history_manager.py +236 -0
tactus/core/mocking.py +286 -0
tactus/core/output_validator.py +291 -0
tactus/core/registry.py +499 -0
tactus/core/runtime.py +2907 -0
tactus/core/template_resolver.py +142 -0
tactus/core/yaml_parser.py +301 -0
tactus/docker/Dockerfile +61 -0
tactus/docker/entrypoint.sh +69 -0
tactus/dspy/__init__.py +39 -0
tactus/dspy/agent.py +1144 -0
tactus/dspy/broker_lm.py +181 -0
tactus/dspy/config.py +212 -0
tactus/dspy/history.py +196 -0
tactus/dspy/module.py +405 -0
tactus/dspy/prediction.py +318 -0
tactus/dspy/signature.py +185 -0
tactus/formatting/__init__.py +7 -0
tactus/formatting/formatter.py +437 -0
tactus/ide/__init__.py +9 -0
tactus/ide/coding_assistant.py +343 -0
tactus/ide/server.py +2223 -0
tactus/primitives/__init__.py +49 -0
tactus/primitives/control.py +168 -0
tactus/primitives/file.py +229 -0
tactus/primitives/handles.py +378 -0
tactus/primitives/host.py +94 -0
tactus/primitives/human.py +342 -0
tactus/primitives/json.py +189 -0
tactus/primitives/log.py +187 -0
tactus/primitives/message_history.py +157 -0
tactus/primitives/model.py +163 -0
tactus/primitives/procedure.py +564 -0
tactus/primitives/procedure_callable.py +318 -0
tactus/primitives/retry.py +155 -0
tactus/primitives/session.py +152 -0
tactus/primitives/state.py +182 -0
tactus/primitives/step.py +209 -0
tactus/primitives/system.py +93 -0
tactus/primitives/tool.py +375 -0
tactus/primitives/tool_handle.py +279 -0
tactus/primitives/toolset.py +229 -0
tactus/protocols/__init__.py +38 -0
tactus/protocols/chat_recorder.py +81 -0
tactus/protocols/config.py +97 -0
tactus/protocols/cost.py +31 -0
tactus/protocols/hitl.py +71 -0
tactus/protocols/log_handler.py +27 -0
tactus/protocols/models.py +355 -0
tactus/protocols/result.py +33 -0
tactus/protocols/storage.py +90 -0
tactus/providers/__init__.py +13 -0
tactus/providers/base.py +92 -0
tactus/providers/bedrock.py +117 -0
tactus/providers/google.py +105 -0
tactus/providers/openai.py +98 -0
tactus/sandbox/__init__.py +63 -0
tactus/sandbox/config.py +171 -0
tactus/sandbox/container_runner.py +1099 -0
tactus/sandbox/docker_manager.py +433 -0
tactus/sandbox/entrypoint.py +227 -0
tactus/sandbox/protocol.py +213 -0
tactus/stdlib/__init__.py +10 -0
tactus/stdlib/io/__init__.py +13 -0
tactus/stdlib/io/csv.py +88 -0
tactus/stdlib/io/excel.py +136 -0
tactus/stdlib/io/file.py +90 -0
tactus/stdlib/io/fs.py +154 -0
tactus/stdlib/io/hdf5.py +121 -0
tactus/stdlib/io/json.py +109 -0
tactus/stdlib/io/parquet.py +83 -0
tactus/stdlib/io/tsv.py +88 -0
tactus/stdlib/loader.py +274 -0
tactus/stdlib/tac/tactus/tools/done.tac +33 -0
tactus/stdlib/tac/tactus/tools/log.tac +50 -0
tactus/testing/README.md +273 -0
tactus/testing/__init__.py +61 -0
tactus/testing/behave_integration.py +380 -0
tactus/testing/context.py +486 -0
tactus/testing/eval_models.py +114 -0
tactus/testing/evaluation_runner.py +222 -0
tactus/testing/evaluators.py +634 -0
tactus/testing/events.py +94 -0
tactus/testing/gherkin_parser.py +134 -0
tactus/testing/mock_agent.py +315 -0
tactus/testing/mock_dependencies.py +234 -0
tactus/testing/mock_hitl.py +171 -0
tactus/testing/mock_registry.py +168 -0
tactus/testing/mock_tools.py +133 -0
tactus/testing/models.py +115 -0
tactus/testing/pydantic_eval_runner.py +508 -0
tactus/testing/steps/__init__.py +13 -0
tactus/testing/steps/builtin.py +902 -0
tactus/testing/steps/custom.py +69 -0
tactus/testing/steps/registry.py +68 -0
tactus/testing/test_runner.py +489 -0
tactus/tracing/__init__.py +5 -0
tactus/tracing/trace_manager.py +417 -0
tactus/utils/__init__.py +1 -0
tactus/utils/cost_calculator.py +72 -0
tactus/utils/model_pricing.py +132 -0
tactus/utils/safe_file_library.py +502 -0
tactus/utils/safe_libraries.py +234 -0
tactus/validation/LuaLexerBase.py +66 -0
tactus/validation/LuaParserBase.py +23 -0
tactus/validation/README.md +224 -0
tactus/validation/__init__.py +7 -0
tactus/validation/error_listener.py +21 -0
tactus/validation/generated/LuaLexer.interp +231 -0
tactus/validation/generated/LuaLexer.py +5548 -0
tactus/validation/generated/LuaLexer.tokens +124 -0
tactus/validation/generated/LuaLexerBase.py +66 -0
tactus/validation/generated/LuaParser.interp +173 -0
tactus/validation/generated/LuaParser.py +6439 -0
tactus/validation/generated/LuaParser.tokens +124 -0
tactus/validation/generated/LuaParserBase.py +23 -0
tactus/validation/generated/LuaParserVisitor.py +118 -0
tactus/validation/generated/__init__.py +7 -0
tactus/validation/grammar/LuaLexer.g4 +123 -0
tactus/validation/grammar/LuaParser.g4 +178 -0
tactus/validation/semantic_visitor.py +817 -0
tactus/validation/validator.py +157 -0
tactus-0.31.0.dist-info/METADATA +1809 -0
tactus-0.31.0.dist-info/RECORD +160 -0
tactus-0.31.0.dist-info/WHEEL +4 -0
tactus-0.31.0.dist-info/entry_points.txt +2 -0
tactus-0.31.0.dist-info/licenses/LICENSE +21 -0

tactus/stdlib/tac/tactus/tools/log.tac ADDED Viewed

@@ -0,0 +1,50 @@
+--[[
+tactus.tools.log: Logging tool for agents
+Usage:
+    local log = require("tactus.tools.log")
+    -- In an agent's toolset
+    agent = Agent {
+        tools = {"log"},
+        ...
+    }
+This tool allows agents to log messages during execution.
+Note: For direct logging in procedures, use the Log global directly:
+    Log.info("message")
+    Log.debug("message", {key = value})
+]]--
+local log_tool = Tool {
+    name = "log",
+    description = "Log a message during procedure execution",
+    input = {
+        message = field.string{required = true, description = "Message to log"},
+        level = field.string{required = false, description = "Log level: debug, info, warn, error"},
+        data = field.object{required = false, description = "Optional data to include"}
+    },
+    function(args)
+        local level = args.level or "info"
+        local data = args.data or {}
+        -- Use the Log global which is injected by the runtime
+        if level == "debug" then
+            Log.debug(args.message, data)
+        elseif level == "warn" then
+            Log.warn(args.message, data)
+        elseif level == "error" then
+            Log.error(args.message, data)
+        else
+            Log.info(args.message, data)
+        end
+        return {
+            logged = true,
+            level = level,
+            message = args.message
+        }
+    end
+}
+return log_tool

tactus/testing/README.md ADDED Viewed

@@ -0,0 +1,273 @@
+# Tactus BDD Testing Framework
+First-class Gherkin-style BDD testing integrated into the Tactus DSL.
+## Overview
+The Tactus BDD Testing Framework allows you to write behavior-driven tests directly in your procedure files using Gherkin syntax. Tests are executed using Behave under the hood, with full support for:
+- **Natural language specifications** - Write tests in plain English using Gherkin
+- **Built-in step library** - Comprehensive steps for Tactus primitives (tools, state, etc.)
+- **Custom steps** - Define your own steps in Lua for advanced assertions
+- **Parallel execution** - Run scenarios in parallel for fast feedback
+- **Consistency evaluation** - Run tests multiple times to measure reliability
+- **Structured results** - All results are Pydantic models, no text parsing
+## Quick Start
+### 1. Add Specifications to Your Procedure
+```lua
+-- procedure.tac
+name("my_procedure")
+version("1.0.0")
+agent("worker", {
+  provider = "openai",
+  model = "gpt-4o-mini",
+  system_prompt = "Do the work",
+  tools = {"search", "done"}
+})
+procedure(function()
+  repeat
+    Worker()
+  until done.called()
+end)
+-- Add BDD specifications
+specifications([[
+Feature: My Procedure
+  Scenario: Worker completes task
+    Given the procedure has started
+    When the worker agent takes turns
+    Then the search tool should be called
+    And the done tool should be called
+    And the procedure should complete successfully
+]])
+```
+### 2. Run Tests
+```bash
+# Run all scenarios once
+tactus test procedure.tac
+# Run specific scenario
+tactus test procedure.tac --scenario "Worker completes task"
+# Run without parallel execution
+tactus test procedure.tac --no-parallel
+```
+### 3. Evaluate Consistency
+```bash
+# Run each scenario 10 times to measure consistency
+tactus test procedure.tac --runs 10
+# Run with more iterations
+tactus test procedure.tac --runs 50
+# Evaluate specific scenario
+tactus test procedure.tac --scenario "Worker completes task" --runs 20
+```
+## Built-in Steps
+The framework provides a comprehensive library of built-in steps:
+### Tool Steps
+```gherkin
+Then the search tool should be called
+Then the search tool should not be called
+Then the search tool should be called at least 3 times
+Then the search tool should be called exactly 2 times
+Then the search tool should be called with query=test
+```
+### State Steps
+```gherkin
+Then the state count should be 5
+Then the state error should exist
+Then the state should contain results
+```
+### Completion Steps
+```gherkin
+Then the procedure should complete successfully
+Then the procedure should fail
+Then the stop reason should be done
+Then the stop reason should contain timeout
+```
+### Iteration Steps
+```gherkin
+Then the total iterations should be less than 10
+Then the total iterations should be between 5 and 15
+Then the agent should take at least 3 turns
+```
+### Parameter Steps
+```gherkin
+Given the topic parameter is quantum computing
+Then the agent's context should include quantum computing
+```
+### Agent Steps
+```gherkin
+When the worker agent takes turns
+When the procedure runs
+```
+## Custom Steps
+Define custom steps in Lua for advanced assertions:
+```lua
+-- Custom step definition
+step("the research quality is high", function()
+  local results = State.get("research_results")
+  assert(#results > 5, "Should have at least 5 results")
+  assert(results[1].quality == "high", "First result should be high quality")
+end)
+-- Use in specifications
+specifications([[
+Feature: Research Quality
+  Scenario: High quality research
+    Given the procedure has started
+    When the procedure runs
+    Then the research quality is high
+]])
+```
+## Evaluation Metrics
+The `evaluate` command runs scenarios multiple times and provides:
+- **Success Rate** - Percentage of runs that passed
+- **Mean Duration** - Average execution time
+- **Standard Deviation** - Timing consistency
+- **Consistency Score** - How often runs produce identical step outcomes (0.0 to 1.0)
+- **Flakiness Detection** - Identifies scenarios with inconsistent results
+Example output:
+```
+Scenario: Agent completes research
+  Success Rate: 90% (9/10)
+  Duration: 1.23s (±0.15s)
+  Consistency: 90%
+  ⚠️  FLAKY - Inconsistent results detected
+```
+## Parser Warnings
+The Tactus validator will warn if your procedure has no specifications:
+```bash
+$ tactus validate procedure.tac
+⚠ Warning: No specifications defined - consider adding BDD tests using specifications([[...]])
+```
+## Note on Evaluations
+This framework is for **testing logic** (BDD). If you want to evaluate **LLM output quality** using datasets and metrics (Pydantic Evals), see the main [README](../../README.md#evaluations-testing-agent-intelligence) and use the `tactus eval` command.
+## Architecture
+```
+Tactus Procedure (.tac)
+  └─ specifications([[ Gherkin text ]])
+  └─ step("custom step", function() ... end)
+           ↓
+    Gherkin Parser (gherkin-official)
+           ↓
+    Feature/Scenario/Step AST
+           ↓
+    Step Matcher (built-in + custom steps)
+           ↓
+    Behave Integration Layer
+      ├─ Generate .feature files
+      ├─ Generate step_definitions.py
+      └─ Run via Behave Runner API
+           ↓
+    Parallel Execution (multiprocessing)
+           ↓
+    Structured Results (Pydantic models)
+           ↓
+    CLI Output / IDE Display / Log Events
+```
+## API Usage
+You can also use the testing framework programmatically:
+```python
+from pathlib import Path
+from tactus.testing import TactusTestRunner, TactusEvaluationRunner
+# Run tests
+runner = TactusTestRunner(Path("procedure.tac"))
+runner.setup(gherkin_text)
+result = runner.run_tests(parallel=True)
+print(f"Passed: {result.passed_scenarios}/{result.total_scenarios}")
+# Run evaluation
+evaluator = TactusEvaluationRunner(Path("procedure.tac"))
+evaluator.setup(gherkin_text)
+eval_results = evaluator.evaluate_all(runs=10, parallel=True)
+for result in eval_results:
+    print(f"{result.scenario_name}: {result.success_rate:.1%} success rate")
+```
+## IDE Integration
+Test and evaluation results are emitted as structured log events for IDE display:
+- `TestStartedEvent`
+- `TestCompletedEvent`
+- `TestScenarioStartedEvent`
+- `TestScenarioCompletedEvent`
+- `EvaluationStartedEvent`
+- `EvaluationCompletedEvent`
+- `EvaluationScenarioStartedEvent`
+- `EvaluationScenarioCompletedEvent`
+- `EvaluationProgressEvent`
+All events are Pydantic models that can be serialized to JSON for display in the IDE's execution panel.
+## Dependencies
+The testing framework requires:
+- `behave>=1.2.6` - BDD test execution
+- `gherkin-official>=28.0.0` - Gherkin parsing
+These are automatically installed with Tactus.
+## Examples
+See `examples/with-bdd-tests.tac` for a complete example with:
+- Multiple scenarios
+- Custom steps
+- Evaluation configuration
+- All major step types

tactus/testing/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""
+Tactus BDD Testing Framework.
+Provides Gherkin-style BDD testing integrated into the Tactus DSL.
+"""
+from .models import (
+    StepResult,
+    ScenarioResult,
+    FeatureResult,
+    TestResult,
+    EvaluationResult,
+    ParsedStep,
+    ParsedScenario,
+    ParsedFeature,
+)
+from .gherkin_parser import GherkinParser
+from .test_runner import TactusTestRunner
+from .evaluation_runner import TactusEvaluationRunner
+from .context import TactusTestContext
+from .mock_tools import MockToolRegistry, MockedToolPrimitive, create_default_mocks
+from .mock_hitl import MockHITLHandler
+from .events import (
+    TestStartedEvent,
+    TestCompletedEvent,
+    TestScenarioStartedEvent,
+    TestScenarioCompletedEvent,
+    EvaluationStartedEvent,
+    EvaluationCompletedEvent,
+    EvaluationScenarioStartedEvent,
+    EvaluationScenarioCompletedEvent,
+    EvaluationProgressEvent,
+)
+__all__ = [
+    "StepResult",
+    "ScenarioResult",
+    "FeatureResult",
+    "TestResult",
+    "EvaluationResult",
+    "ParsedStep",
+    "ParsedScenario",
+    "ParsedFeature",
+    "GherkinParser",
+    "TactusTestRunner",
+    "TactusEvaluationRunner",
+    "TactusTestContext",
+    "MockToolRegistry",
+    "MockedToolPrimitive",
+    "create_default_mocks",
+    "MockHITLHandler",
+    "TestStartedEvent",
+    "TestCompletedEvent",
+    "TestScenarioStartedEvent",
+    "TestScenarioCompletedEvent",
+    "EvaluationStartedEvent",
+    "EvaluationCompletedEvent",
+    "EvaluationScenarioStartedEvent",
+    "EvaluationScenarioCompletedEvent",
+    "EvaluationProgressEvent",
+]