tolokaforge 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tolokaforge-0.2.0/.cursor/rules/cursor.mdc +19 -0
- tolokaforge-0.2.0/.dockerignore +63 -0
- tolokaforge-0.2.0/.env.example +24 -0
- tolokaforge-0.2.0/.gitattributes +6 -0
- tolokaforge-0.2.0/.github/workflows/ci.yml +309 -0
- tolokaforge-0.2.0/.github/workflows/claude-review.yml +100 -0
- tolokaforge-0.2.0/.github/workflows/publish-adapter-terminal-bench.yml +132 -0
- tolokaforge-0.2.0/.github/workflows/publish-tolokaforge.yml +132 -0
- tolokaforge-0.2.0/.github/workflows/release-gate.yml +97 -0
- tolokaforge-0.2.0/.gitignore +93 -0
- tolokaforge-0.2.0/.mcp.json +9 -0
- tolokaforge-0.2.0/.pre-commit-config.yaml +22 -0
- tolokaforge-0.2.0/.python-version +1 -0
- tolokaforge-0.2.0/.roo/mcp.json +21 -0
- tolokaforge-0.2.0/.roomodes +34 -0
- tolokaforge-0.2.0/.vscode/settings.json +30 -0
- tolokaforge-0.2.0/.vscode/tasks.json +72 -0
- tolokaforge-0.2.0/AGENTS.md +461 -0
- tolokaforge-0.2.0/CHANGELOG.md +79 -0
- tolokaforge-0.2.0/CITATION.bib +8 -0
- tolokaforge-0.2.0/CITATION.cff +11 -0
- tolokaforge-0.2.0/CLAUDE.md +11 -0
- tolokaforge-0.2.0/CONTRIBUTING.md +35 -0
- tolokaforge-0.2.0/CONTRIBUTORS.md +12 -0
- tolokaforge-0.2.0/LICENSE +13 -0
- tolokaforge-0.2.0/Makefile +143 -0
- tolokaforge-0.2.0/PKG-INFO +230 -0
- tolokaforge-0.2.0/README.md +152 -0
- tolokaforge-0.2.0/docs/ADAPTERS.md +154 -0
- tolokaforge-0.2.0/docs/ADAPTER_ARCHITECTURE.md +220 -0
- tolokaforge-0.2.0/docs/ADAPTER_INTERFACE.md +83 -0
- tolokaforge-0.2.0/docs/ADD_NEW_MODEL.md +290 -0
- tolokaforge-0.2.0/docs/ANALYTICS.md +110 -0
- tolokaforge-0.2.0/docs/API.md +71 -0
- tolokaforge-0.2.0/docs/BACKEND_STATUS_MATRIX.md +29 -0
- tolokaforge-0.2.0/docs/BENCHMARK_BACKEND_DESIGNS.md +68 -0
- tolokaforge-0.2.0/docs/BENCHMARK_TYPES.md +41 -0
- tolokaforge-0.2.0/docs/BROWSER_TOOLS.md +156 -0
- tolokaforge-0.2.0/docs/CONFIG.md +316 -0
- tolokaforge-0.2.0/docs/CONVERSION_LAYER.md +214 -0
- tolokaforge-0.2.0/docs/DB_SERVICE_API.md +921 -0
- tolokaforge-0.2.0/docs/DEEP_RESEARCH.md +45 -0
- tolokaforge-0.2.0/docs/FINAL_AUDIT.md +434 -0
- tolokaforge-0.2.0/docs/FUTURE_DEVELOPMENT.md +617 -0
- tolokaforge-0.2.0/docs/GEMINI_QUIRKS.md +467 -0
- tolokaforge-0.2.0/docs/GETTING_STARTED.md +180 -0
- tolokaforge-0.2.0/docs/GOLDEN_TRIALS.md +16 -0
- tolokaforge-0.2.0/docs/GRADING.md +187 -0
- tolokaforge-0.2.0/docs/GRADING_VERIFICATION.md +367 -0
- tolokaforge-0.2.0/docs/GRPC_PROTOCOL.md +699 -0
- tolokaforge-0.2.0/docs/KNOWLEDGE_REASONING.md +42 -0
- tolokaforge-0.2.0/docs/LLM_LAYER.md +733 -0
- tolokaforge-0.2.0/docs/LOGGING.md +667 -0
- tolokaforge-0.2.0/docs/MCP_INTEGRATION.md +22 -0
- tolokaforge-0.2.0/docs/NATIVE_ADAPTER.md +401 -0
- tolokaforge-0.2.0/docs/NOVA_INTEGRATION.md +155 -0
- tolokaforge-0.2.0/docs/OUTPUT_FORMAT.md +433 -0
- tolokaforge-0.2.0/docs/PERFORMANCE.md +94 -0
- tolokaforge-0.2.0/docs/PYTHON_PACKAGE.md +110 -0
- tolokaforge-0.2.0/docs/REFERENCE.md +428 -0
- tolokaforge-0.2.0/docs/RUNNER.md +168 -0
- tolokaforge-0.2.0/docs/SECURITY.md +140 -0
- tolokaforge-0.2.0/docs/TASKS.md +197 -0
- tolokaforge-0.2.0/docs/TASK_DESCRIPTION_SCHEMA.md +698 -0
- tolokaforge-0.2.0/docs/TASK_PACKS.md +104 -0
- tolokaforge-0.2.0/docs/TOOLS.md +57 -0
- tolokaforge-0.2.0/docs/TROUBLESHOOTING.md +54 -0
- tolokaforge-0.2.0/docs/TYPESENSE_INTEGRATION.md +421 -0
- tolokaforge-0.2.0/docs/custom_checks.md +18 -0
- tolokaforge-0.2.0/examples/README.md +35 -0
- tolokaforge-0.2.0/examples/native/browser_task/README.md +57 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/fixtures/policy_brief.txt +9 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/grading.yaml +36 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/index.html +12 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/order_7712.html +10 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/policy_cancellation.html +8 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/policy_enterprise_addendum.html +7 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/policy_refunds.html +8 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_01/task.yaml +30 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/fixtures/runbook_notes.txt +5 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/grading.yaml +36 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/incident_443_ticket.html +8 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/index.html +10 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/runbook_base.html +7 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/runbook_emergency_addendum.html +7 -0
- tolokaforge-0.2.0/examples/native/browser_task/dataset/tasks/browser/browser_public_example_02/task.yaml +30 -0
- tolokaforge-0.2.0/examples/native/browser_task/run_config.yaml +24 -0
- tolokaforge-0.2.0/examples/native/coding/README.md +22 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_01/fixtures/README.md +11 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_01/fixtures/buggy_math.py +7 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_01/grading.yaml +37 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_01/task.yaml +27 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_02/fixtures/README.md +17 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_02/fixtures/data_parser.py +3 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_02/grading.yaml +40 -0
- tolokaforge-0.2.0/examples/native/coding/dataset/tasks/coding/coding_public_example_02/task.yaml +27 -0
- tolokaforge-0.2.0/examples/native/coding/run_config.yaml +25 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/README.md +46 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/_shared/domain.yaml +18 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/_shared/mcp_server.py +18 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/_shared/models.py +17 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/_shared/system_prompt.md +20 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/_shared/tools/__init__.py +10 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/_shared/tools/notes.py +19 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/fixtures/tools.json +59 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/testcases/add_first_note/grading.yaml +22 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/testcases/add_first_note/initial_state.json +3 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/testcases/add_first_note/task.yaml +20 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/testcases/recall_existing_note/grading.yaml +18 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/testcases/recall_existing_note/initial_state.json +14 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/dataset/notes/testcases/recall_existing_note/task.yaml +21 -0
- tolokaforge-0.2.0/examples/native/native_shared_domain/run_config.yaml +25 -0
- tolokaforge-0.2.0/examples/native/tool_use/README.md +22 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_01/fixtures/customer_ticket.json +8 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_01/grading.yaml +42 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_01/initial_state.json +13 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_01/task.yaml +26 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_02/fixtures/account_policy.md +5 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_02/grading.yaml +41 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_02/initial_state.json +11 -0
- tolokaforge-0.2.0/examples/native/tool_use/dataset/tasks/tool_use/tool_use_public_example_02/task.yaml +26 -0
- tolokaforge-0.2.0/examples/native/tool_use/run_config.yaml +25 -0
- tolokaforge-0.2.0/examples/terminal_bench/README.md +84 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/README.md +78 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/docker-compose.yaml +20 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/Dockerfile +50 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/docs/pipeline_design.md +87 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/entrypoint.sh +13 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/generate_data.py +249 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/init.sql +87 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/pipeline/cluster.py +46 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/pipeline/config.py +16 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/pipeline/extract.py +59 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/pipeline/features.py +72 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/pipeline/main.py +75 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/environment/pipeline/report.py +95 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/instruction.md +58 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/task.toml +17 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/task.yaml +73 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/tests/test.sh +53 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-airline-segmentation/tests/test_segmentation.py +304 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/README.md +75 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/docker-compose.yaml +20 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/Dockerfile +41 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/aggregator.py +119 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/config.py +9 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/database.py +15 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/fee_utils.py +91 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/legacy_fee_config.json +12 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/main.py +13 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/models.py +70 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/routers/__init__.py +0 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/routers/holds.py +146 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/app/routers/reports.py +28 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/entrypoint.sh +17 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/environment/init.sql +279 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/instruction.md +132 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/run-tests.sh +76 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/task.toml +17 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/task.yaml +121 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/tests/test.sh +76 -0
- tolokaforge-0.2.0/examples/terminal_bench/fix-billing-holds/tests/test_billing.py +557 -0
- tolokaforge-0.2.0/examples/terminal_bench/run_airline_segmentation.yaml +30 -0
- tolokaforge-0.2.0/examples/terminal_bench/run_billing_holds.yaml +30 -0
- tolokaforge-0.2.0/examples/terminal_bench/run_config.yaml +37 -0
- tolokaforge-0.2.0/pyproject.toml +315 -0
- tolokaforge-0.2.0/scripts/README.md +39 -0
- tolokaforge-0.2.0/scripts/common.sh +123 -0
- tolokaforge-0.2.0/scripts/generate_task_pack_compose_override.py +77 -0
- tolokaforge-0.2.0/scripts/setup/create_python_venv.sh +101 -0
- tolokaforge-0.2.0/scripts/setup/init_git_lfs.sh +32 -0
- tolokaforge-0.2.0/scripts/setup/setup_env.sh +171 -0
- tolokaforge-0.2.0/scripts/tests/smoke.sh +78 -0
- tolokaforge-0.2.0/scripts/tests/task_pack_docker_smoke.sh +124 -0
- tolokaforge-0.2.0/scripts/with_env.sh +44 -0
- tolokaforge-0.2.0/scripts/with_profile.sh +32 -0
- tolokaforge-0.2.0/tests/AGENTS.md +79 -0
- tolokaforge-0.2.0/tests/README.md +155 -0
- tolokaforge-0.2.0/tests/__init__.py +1 -0
- tolokaforge-0.2.0/tests/canonical/README.md +61 -0
- tolokaforge-0.2.0/tests/canonical/__init__.py +0 -0
- tolokaforge-0.2.0/tests/canonical/conftest.py +63 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/.gitkeep +0 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/golden_set_bug_reproduction/food_delivery_2_hash_comparison.json +6 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/golden_set_execution/food_delivery_2_golden_execution.json +4 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/grading_state_calc/fail_result.json +4 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/grading_state_calc/pass_result.json +4 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/grading_transcript_calc/fail_result.json +4 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/grading_transcript_calc/pass_result.json +4 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_browser_basic/grading_config.json +32 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_browser_basic/task_config.json +52 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_calc_basic/grading_config.json +31 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_calc_basic/task_config.json +51 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_example_domain_case_a/bundle_artifact_keys.json +10 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_example_domain_case_a/grading_config.json +24 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_example_domain_case_a/task_config.json +44 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_shop_orders_02/grading_config.json +135 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_shop_orders_02/initial_state_tables.json +46 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_shop_orders_02/task_config.json +53 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/native_shop_orders_02/tool_schemas.json +145 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/sanitizer_contract/anthropic.json +373 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/sanitizer_contract/aws_nova.json +373 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/sanitizer_contract/default.json +373 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/sanitizer_contract/openai_gpt5.json +379 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/sanitizer_contract/qwen.json +373 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/sanitizer_contract/xai_grok.json +379 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/schema_policy_dict_map_hints_tau/dict_map_hints.json +3 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/schema_policy_strict_tau/strict_transform.json +91 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/tau_conversion/task_config.json +28 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/tbench_echo_hello/grading_config.json +13 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/tbench_echo_hello/task_config.json +51 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/tbench_echo_hello/task_description.json +98 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/tbench_echo_hello/tool_schemas.json +44 -0
- tolokaforge-0.2.0/tests/canonical/snapshots/trajectory_reasoning/trajectory.yaml +36 -0
- tolokaforge-0.2.0/tests/canonical/test_cache_policy_preset_routing.py +73 -0
- tolokaforge-0.2.0/tests/canonical/test_capability_registry.py +194 -0
- tolokaforge-0.2.0/tests/canonical/test_content_policy_filler_routing.py +89 -0
- tolokaforge-0.2.0/tests/canonical/test_cost_extraction_canon.py +314 -0
- tolokaforge-0.2.0/tests/canonical/test_custom_checks_canon.py +324 -0
- tolokaforge-0.2.0/tests/canonical/test_grading_canon.py +135 -0
- tolokaforge-0.2.0/tests/canonical/test_grading_pipeline_canon.py +114 -0
- tolokaforge-0.2.0/tests/canonical/test_native_adapter_canon.py +360 -0
- tolokaforge-0.2.0/tests/canonical/test_output_layout.py +351 -0
- tolokaforge-0.2.0/tests/canonical/test_sanitizer_contract.py +365 -0
- tolokaforge-0.2.0/tests/canonical/test_schema_policies.py +126 -0
- tolokaforge-0.2.0/tests/canonical/test_shop_orders_02_behavior_canon.py +984 -0
- tolokaforge-0.2.0/tests/canonical/test_terminal_bench_adapter_canon.py +147 -0
- tolokaforge-0.2.0/tests/canonical/test_trajectory_reasoning_snapshot.py +156 -0
- tolokaforge-0.2.0/tests/conftest.py +111 -0
- tolokaforge-0.2.0/tests/data/__init__.py +0 -0
- tolokaforge-0.2.0/tests/data/configs/tau_retail_mini.yaml +33 -0
- tolokaforge-0.2.0/tests/data/projects/README.md +230 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/check_helpers.py +190 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/cities.json +62 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/combined_initial_state.json +11267 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/menu_item_categories.json +92 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/menu_items.json +2891 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/menu_items_per_cuisine.json +462 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/money_back_requests.json +1 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/orders.json +4467 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/restaurant_names_desc.json +462 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/restaurant_rates.json +284 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/restaurants.json +1698 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/data/users.json +846 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/mcp_server.py +131 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/output/trials/051fa6cb-a29e-4a0d-9ccf-e0f95802eee5/0/env.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/output/trials/051fa6cb-a29e-4a0d-9ccf-e0f95802eee5/0/grade.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/output/trials/051fa6cb-a29e-4a0d-9ccf-e0f95802eee5/0/logs.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/output/trials/051fa6cb-a29e-4a0d-9ccf-e0f95802eee5/0/metrics.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/output/trials/051fa6cb-a29e-4a0d-9ccf-e0f95802eee5/0/task.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/output/trials/051fa6cb-a29e-4a0d-9ccf-e0f95802eee5/0/trajectory.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tasks/order_modify_with_checks/checks.py +255 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tasks/order_modify_with_checks/grading.yaml +38 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tasks/order_modify_with_checks/task.yaml +84 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tasks/order_six_items_golden/grading.yaml +52 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tasks/order_six_items_golden/task.yaml +88 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/__init__.py +4 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/add_payment_method.py +106 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/add_restaurant_rating.py +99 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/calculate.py +36 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/cancel_order.py +76 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/change_primary_payment_method.py +81 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/create_money_back_request.py +103 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/create_order.py +274 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/data/__init__.py +36 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/data/constants.py +81 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/data/schemas.py +201 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/delete_money_back_request.py +69 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/delete_payment_method.py +84 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/delete_restaurant_rating.py +92 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_order_details.py +47 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_restaurant_details.py +87 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_restaurant_rating.py +86 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_restaurants_list.py +100 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_user_details.py +35 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_user_money_back_requests.py +100 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/get_user_payments_history.py +103 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/lookup_for_city_id.py +54 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/modify_order.py +219 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/think.py +31 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/tool_base.py +7 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/tools_helpers.py +40 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/transfer_to_human_agents.py +36 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/update_user_address.py +80 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tau_tools/update_user_details.py +111 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/tools_helpers.py +40 -0
- tolokaforge-0.2.0/tests/data/projects/food_delivery_2/wiki.md +153 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/data/__init__.py +66 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/env.py +5 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/aggregate.json +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/per_task_metrics.json +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/run_state.json +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/0f3b1ff7/0/env.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/0f3b1ff7/0/grade.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/0f3b1ff7/0/logs.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/0f3b1ff7/0/metrics.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/0f3b1ff7/0/task.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/0f3b1ff7/0/trajectory.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_001/0/env.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_001/0/grade.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_001/0/logs.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_001/0/metrics.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_001/0/task.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_001/0/trajectory.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_002/0/env.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_002/0/grade.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_002/0/logs.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_002/0/metrics.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_002/0/task.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/output/trials/test_002/0/trajectory.yaml +3 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/tasks_test.py +60 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/tools/__init__.py +115 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/types_local.py +26 -0
- tolokaforge-0.2.0/tests/data/projects/tau_retail_mini/wiki.md +17 -0
- tolokaforge-0.2.0/tests/data/tasks/bad_mobile/grading.yaml +2 -0
- tolokaforge-0.2.0/tests/data/tasks/bad_mobile/task.yaml +16 -0
- tolokaforge-0.2.0/tests/data/tasks/browser_basic/grading.yaml +29 -0
- tolokaforge-0.2.0/tests/data/tasks/browser_basic/task.yaml +44 -0
- tolokaforge-0.2.0/tests/data/tasks/calc_basic/grading.yaml +28 -0
- tolokaforge-0.2.0/tests/data/tasks/calc_basic/task.yaml +29 -0
- tolokaforge-0.2.0/tests/data/tasks/calc_custom_checks/checks.py +84 -0
- tolokaforge-0.2.0/tests/data/tasks/calc_custom_checks/grading.yaml +26 -0
- tolokaforge-0.2.0/tests/data/tasks/calc_custom_checks/task.yaml +30 -0
- tolokaforge-0.2.0/tests/data/tasks/example_domain/_shared/domain.yaml +18 -0
- tolokaforge-0.2.0/tests/data/tasks/example_domain/_shared/mcp_server.py +7 -0
- tolokaforge-0.2.0/tests/data/tasks/example_domain/_shared/system_prompt.md +4 -0
- tolokaforge-0.2.0/tests/data/tasks/example_domain/testcases/case_a/grading.yaml +11 -0
- tolokaforge-0.2.0/tests/data/tasks/example_domain/testcases/case_a/initial_state.json +3 -0
- tolokaforge-0.2.0/tests/data/tasks/example_domain/testcases/case_a/task.yaml +11 -0
- tolokaforge-0.2.0/tests/data/tasks/shop_orders_02/fixtures/tools.json +101 -0
- tolokaforge-0.2.0/tests/data/tasks/shop_orders_02/grading.yaml +127 -0
- tolokaforge-0.2.0/tests/data/tasks/shop_orders_02/initial_state.json +41 -0
- tolokaforge-0.2.0/tests/data/tasks/shop_orders_02/mcp_server.py +180 -0
- tolokaforge-0.2.0/tests/data/tasks/shop_orders_02/system_prompt.md +44 -0
- tolokaforge-0.2.0/tests/data/tasks/shop_orders_02/task.yaml +46 -0
- tolokaforge-0.2.0/tests/data/tasks/synth_mobile_01/grading.yaml +2 -0
- tolokaforge-0.2.0/tests/data/tasks/synth_mobile_01/initial_state.json +3 -0
- tolokaforge-0.2.0/tests/data/tasks/synth_mobile_01/task.yaml +23 -0
- tolokaforge-0.2.0/tests/data/terminal_bench_tasks/echo-hello/docker-compose.yaml +12 -0
- tolokaforge-0.2.0/tests/data/terminal_bench_tasks/echo-hello/environment/Dockerfile +3 -0
- tolokaforge-0.2.0/tests/data/terminal_bench_tasks/echo-hello/run-tests.sh +24 -0
- tolokaforge-0.2.0/tests/data/terminal_bench_tasks/echo-hello/task.toml +16 -0
- tolokaforge-0.2.0/tests/data/terminal_bench_tasks/echo-hello/task.yaml +2 -0
- tolokaforge-0.2.0/tests/data/terminal_bench_tasks/echo-hello/tests/test_outputs.py +11 -0
- tolokaforge-0.2.0/tests/integration/__init__.py +1 -0
- tolokaforge-0.2.0/tests/integration/docker/__init__.py +8 -0
- tolokaforge-0.2.0/tests/integration/docker/conftest.py +74 -0
- tolokaforge-0.2.0/tests/integration/docker/test_docker_build_context.py +103 -0
- tolokaforge-0.2.0/tests/integration/docker/test_docker_caching.py +254 -0
- tolokaforge-0.2.0/tests/integration/docker/test_docker_integration.py +547 -0
- tolokaforge-0.2.0/tests/integration/docker/test_docker_stack.py +278 -0
- tolokaforge-0.2.0/tests/integration/llm/__init__.py +14 -0
- tolokaforge-0.2.0/tests/integration/llm/_capability.py +388 -0
- tolokaforge-0.2.0/tests/integration/llm/conftest.py +122 -0
- tolokaforge-0.2.0/tests/integration/llm/registry.py +893 -0
- tolokaforge-0.2.0/tests/integration/llm/test_basic_completion.py +42 -0
- tolokaforge-0.2.0/tests/integration/llm/test_cost_populated.py +73 -0
- tolokaforge-0.2.0/tests/integration/llm/test_decimal_field_tool_call.py +108 -0
- tolokaforge-0.2.0/tests/integration/llm/test_dict_map_tool_call.py +123 -0
- tolokaforge-0.2.0/tests/integration/llm/test_discriminated_union_tool_call.py +327 -0
- tolokaforge-0.2.0/tests/integration/llm/test_enum_slash_tolerance.py +132 -0
- tolokaforge-0.2.0/tests/integration/llm/test_enum_slash_tolerance_unsupported_ratchet.py +127 -0
- tolokaforge-0.2.0/tests/integration/llm/test_gemini_placeholder_signature_replay.py +362 -0
- tolokaforge-0.2.0/tests/integration/llm/test_implicit_prompt_caching.py +168 -0
- tolokaforge-0.2.0/tests/integration/llm/test_implicit_prompt_caching_unsupported_ratchet.py +184 -0
- tolokaforge-0.2.0/tests/integration/llm/test_lexical_tool_invention.py +145 -0
- tolokaforge-0.2.0/tests/integration/llm/test_multi_turn_error_recovery.py +232 -0
- tolokaforge-0.2.0/tests/integration/llm/test_multi_turn_tool_use.py +101 -0
- tolokaforge-0.2.0/tests/integration/llm/test_nova_api.py +549 -0
- tolokaforge-0.2.0/tests/integration/llm/test_progress_after_success.py +268 -0
- tolokaforge-0.2.0/tests/integration/llm/test_prompt_caching.py +155 -0
- tolokaforge-0.2.0/tests/integration/llm/test_re2_pattern_tolerance.py +142 -0
- tolokaforge-0.2.0/tests/integration/llm/test_re2_pattern_tolerance_unsupported_ratchet.py +134 -0
- tolokaforge-0.2.0/tests/integration/llm/test_required_fields_complete.py +154 -0
- tolokaforge-0.2.0/tests/integration/llm/test_simple_tool_call.py +100 -0
- tolokaforge-0.2.0/tests/integration/llm/test_thinking_emits_blocks.py +88 -0
- tolokaforge-0.2.0/tests/integration/llm/test_thinking_replay_roundtrip.py +185 -0
- tolokaforge-0.2.0/tests/integration/llm/test_tool_name_discipline.py +133 -0
- tolokaforge-0.2.0/tests/integration/llm/test_unsigned_thinking_replay.py +179 -0
- tolokaforge-0.2.0/tests/integration/llm/test_usage_metrics_populated.py +51 -0
- tolokaforge-0.2.0/tests/integration/test_browser_tool.py +109 -0
- tolokaforge-0.2.0/tests/integration/test_docker_grading.py +39 -0
- tolokaforge-0.2.0/tests/integration/test_docker_services.py +182 -0
- tolokaforge-0.2.0/tests/integration/test_run_queue_postgres.py +78 -0
- tolokaforge-0.2.0/tests/integration/test_runner_cleanup_trial_grpc.py +109 -0
- tolokaforge-0.2.0/tests/integration/test_security.py +84 -0
- tolokaforge-0.2.0/tests/integration/test_typesense_lifecycle.py +50 -0
- tolokaforge-0.2.0/tests/unit/__init__.py +1 -0
- tolokaforge-0.2.0/tests/unit/adapters/__init__.py +1 -0
- tolokaforge-0.2.0/tests/unit/conftest.py +51 -0
- tolokaforge-0.2.0/tests/unit/grading/__init__.py +1 -0
- tolokaforge-0.2.0/tests/unit/grading/test_custom_checks.py +721 -0
- tolokaforge-0.2.0/tests/unit/grading/test_custom_checks_runner.py +281 -0
- tolokaforge-0.2.0/tests/unit/grading/test_evaluators.py +333 -0
- tolokaforge-0.2.0/tests/unit/grading/test_fuzzy_compare.py +200 -0
- tolokaforge-0.2.0/tests/unit/grading/test_grading_correctness.py +639 -0
- tolokaforge-0.2.0/tests/unit/grading/test_hash.py +144 -0
- tolokaforge-0.2.0/tests/unit/grading/test_judge.py +802 -0
- tolokaforge-0.2.0/tests/unit/grading/test_llm_judge_runner.py +241 -0
- tolokaforge-0.2.0/tests/unit/grading/test_state_checks.py +319 -0
- tolokaforge-0.2.0/tests/unit/grading/test_transcript.py +221 -0
- tolokaforge-0.2.0/tests/unit/llm/__init__.py +0 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/__init__.py +4 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/anthropic_display_omitted_response.json +19 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/anthropic_thinking_response.json +27 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/anthropic_usage_with_cache.json +15 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/minimal_usage.json +7 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openai_gpt5_reasoning_response.json +17 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openai_gpt5_usage_with_reasoning.json +13 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openrouter_anthropic_reasoning_response.json +22 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openrouter_anthropic_usage.json +17 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openrouter_anthropic_usage_real.json +27 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openrouter_gemini_reasoning_encrypted_response.json +17 -0
- tolokaforge-0.2.0/tests/unit/llm/fixtures/openrouter_gemini_reasoning_text_response.json +21 -0
- tolokaforge-0.2.0/tests/unit/llm/test_anthropic_claude_47_preset.py +63 -0
- tolokaforge-0.2.0/tests/unit/llm/test_cache_policy_anthropic.py +303 -0
- tolokaforge-0.2.0/tests/unit/llm/test_capability_certificate.py +127 -0
- tolokaforge-0.2.0/tests/unit/llm/test_detect_dict_maps.py +181 -0
- tolokaforge-0.2.0/tests/unit/llm/test_json_coerce_response.py +247 -0
- tolokaforge-0.2.0/tests/unit/llm/test_message_replay.py +426 -0
- tolokaforge-0.2.0/tests/unit/llm/test_model_config_reasoning_rejects_string.py +68 -0
- tolokaforge-0.2.0/tests/unit/llm/test_openrouter_dict_stringify_recovery_preset.py +82 -0
- tolokaforge-0.2.0/tests/unit/llm/test_params_policy_budget.py +310 -0
- tolokaforge-0.2.0/tests/unit/llm/test_params_policy_unsupported_effort.py +176 -0
- tolokaforge-0.2.0/tests/unit/llm/test_preset_fingerprint.py +187 -0
- tolokaforge-0.2.0/tests/unit/llm/test_preset_overrides.py +69 -0
- tolokaforge-0.2.0/tests/unit/llm/test_qwen_dict_map_hints.py +225 -0
- tolokaforge-0.2.0/tests/unit/llm/test_qwen_preset.py +94 -0
- tolokaforge-0.2.0/tests/unit/llm/test_reasoning_codec.py +39 -0
- tolokaforge-0.2.0/tests/unit/llm/test_reasoning_codec_anthropic.py +469 -0
- tolokaforge-0.2.0/tests/unit/llm/test_reasoning_codec_gemini.py +609 -0
- tolokaforge-0.2.0/tests/unit/llm/test_reasoning_codec_openai.py +95 -0
- tolokaforge-0.2.0/tests/unit/llm/test_reasoning_dataclasses.py +124 -0
- tolokaforge-0.2.0/tests/unit/llm/test_response_policy_empty_container_coercion.py +157 -0
- tolokaforge-0.2.0/tests/unit/llm/test_response_policy_param_types_wiring.py +245 -0
- tolokaforge-0.2.0/tests/unit/llm/test_schema_sanitizer.py +104 -0
- tolokaforge-0.2.0/tests/unit/llm/test_schema_sanitizer_position_aware.py +415 -0
- tolokaforge-0.2.0/tests/unit/llm/test_schema_sanitizer_strict.py +325 -0
- tolokaforge-0.2.0/tests/unit/llm/test_synthetic_envelope_detection.py +358 -0
- tolokaforge-0.2.0/tests/unit/llm/test_usage.py +335 -0
- tolokaforge-0.2.0/tests/unit/llm/test_usage_pipeline_e2e.py +156 -0
- tolokaforge-0.2.0/tests/unit/secrets/__init__.py +0 -0
- tolokaforge-0.2.0/tests/unit/secrets/test_known_values.py +91 -0
- tolokaforge-0.2.0/tests/unit/secrets/test_log_filter.py +436 -0
- tolokaforge-0.2.0/tests/unit/secrets/test_no_raw_secret_access.py +103 -0
- tolokaforge-0.2.0/tests/unit/secrets/test_singleton.py +88 -0
- tolokaforge-0.2.0/tests/unit/test_adapters.py +225 -0
- tolokaforge-0.2.0/tests/unit/test_assemble_result_per_call_record.py +202 -0
- tolokaforge-0.2.0/tests/unit/test_browser_tool.py +159 -0
- tolokaforge-0.2.0/tests/unit/test_builtin_generic_wrapper.py +117 -0
- tolokaforge-0.2.0/tests/unit/test_builtin_registry.py +91 -0
- tolokaforge-0.2.0/tests/unit/test_builtin_tool_url_env_fallback.py +90 -0
- tolokaforge-0.2.0/tests/unit/test_builtin_tool_wrapper.py +212 -0
- tolokaforge-0.2.0/tests/unit/test_calculator_tool.py +165 -0
- tolokaforge-0.2.0/tests/unit/test_cli_commands.py +517 -0
- tolokaforge-0.2.0/tests/unit/test_cli_status.py +98 -0
- tolokaforge-0.2.0/tests/unit/test_config_validator.py +271 -0
- tolokaforge-0.2.0/tests/unit/test_conftest_docker_extra.py +32 -0
- tolokaforge-0.2.0/tests/unit/test_core_stack_playwright.py +40 -0
- tolokaforge-0.2.0/tests/unit/test_diff.py +147 -0
- tolokaforge-0.2.0/tests/unit/test_docker_adapter_cleanup_trial.py +37 -0
- tolokaforge-0.2.0/tests/unit/test_docker_build_context.py +280 -0
- tolokaforge-0.2.0/tests/unit/test_docker_runtime_grade_sentinel.py +31 -0
- tolokaforge-0.2.0/tests/unit/test_dockerfile_paths.py +73 -0
- tolokaforge-0.2.0/tests/unit/test_env_state_url_defaults.py +47 -0
- tolokaforge-0.2.0/tests/unit/test_executor_service_registry.py +135 -0
- tolokaforge-0.2.0/tests/unit/test_failure_attribution.py +138 -0
- tolokaforge-0.2.0/tests/unit/test_file_and_bash_tools.py +39 -0
- tolokaforge-0.2.0/tests/unit/test_full_stack_kwargs.py +92 -0
- tolokaforge-0.2.0/tests/unit/test_gemini_preset_routing.py +62 -0
- tolokaforge-0.2.0/tests/unit/test_golden_replay.py +176 -0
- tolokaforge-0.2.0/tests/unit/test_llm_providers.py +373 -0
- tolokaforge-0.2.0/tests/unit/test_logging.py +231 -0
- tolokaforge-0.2.0/tests/unit/test_metrics.py +157 -0
- tolokaforge-0.2.0/tests/unit/test_metrics_usage_accumulation.py +112 -0
- tolokaforge-0.2.0/tests/unit/test_mobile_tool.py +46 -0
- tolokaforge-0.2.0/tests/unit/test_mock_web_task_roots.py +88 -0
- tolokaforge-0.2.0/tests/unit/test_model_client.py +1854 -0
- tolokaforge-0.2.0/tests/unit/test_mounts.py +52 -0
- tolokaforge-0.2.0/tests/unit/test_native_adapter_builtin_dispatch.py +78 -0
- tolokaforge-0.2.0/tests/unit/test_native_adapter_domain.py +213 -0
- tolokaforge-0.2.0/tests/unit/test_network_409_race.py +61 -0
- tolokaforge-0.2.0/tests/unit/test_nova_logic.py +37 -0
- tolokaforge-0.2.0/tests/unit/test_orchestrator_full_stack_detection.py +77 -0
- tolokaforge-0.2.0/tests/unit/test_orchestrator_logic.py +975 -0
- tolokaforge-0.2.0/tests/unit/test_orchestrator_playwright_detection.py +57 -0
- tolokaforge-0.2.0/tests/unit/test_output_artifacts.py +300 -0
- tolokaforge-0.2.0/tests/unit/test_output_writer.py +264 -0
- tolokaforge-0.2.0/tests/unit/test_performance.py +76 -0
- tolokaforge-0.2.0/tests/unit/test_pricing.py +392 -0
- tolokaforge-0.2.0/tests/unit/test_rate_limiter.py +102 -0
- tolokaforge-0.2.0/tests/unit/test_resume.py +268 -0
- tolokaforge-0.2.0/tests/unit/test_run_queue.py +85 -0
- tolokaforge-0.2.0/tests/unit/test_runner_bootstrap_secrets.py +105 -0
- tolokaforge-0.2.0/tests/unit/test_runner_builtin_dispatch.py +113 -0
- tolokaforge-0.2.0/tests/unit/test_runner_cleanup_trial.py +109 -0
- tolokaforge-0.2.0/tests/unit/test_runner_filesystem_provisioning.py +97 -0
- tolokaforge-0.2.0/tests/unit/test_runner_jsonpath_grading.py +163 -0
- tolokaforge-0.2.0/tests/unit/test_runner_logic.py +630 -0
- tolokaforge-0.2.0/tests/unit/test_runner_per_trial_cost_accounting.py +197 -0
- tolokaforge-0.2.0/tests/unit/test_runner_pipeline.py +389 -0
- tolokaforge-0.2.0/tests/unit/test_service_definition.py +117 -0
- tolokaforge-0.2.0/tests/unit/test_stack_network_aliases.py +55 -0
- tolokaforge-0.2.0/tests/unit/test_stuck_detector.py +219 -0
- tolokaforge-0.2.0/tests/unit/test_task_loader.py +441 -0
- tolokaforge-0.2.0/tests/unit/test_task_packs.py +157 -0
- tolokaforge-0.2.0/tests/unit/test_terminal_bench.py +654 -0
- tolokaforge-0.2.0/tests/unit/test_tool_builtins.py +667 -0
- tolokaforge-0.2.0/tests/unit/test_tool_schema_tool_config.py +52 -0
- tolokaforge-0.2.0/tests/unit/test_tool_security.py +135 -0
- tolokaforge-0.2.0/tests/unit/test_tools_interface.py +532 -0
- tolokaforge-0.2.0/tests/unit/test_tools_registry.py +170 -0
- tolokaforge-0.2.0/tests/unit/test_trajectory_stage7_fields.py +83 -0
- tolokaforge-0.2.0/tests/unit/test_usage_calls_aggregation.py +123 -0
- tolokaforge-0.2.0/tests/unit/test_user_simulator_prompt_capture.py +77 -0
- tolokaforge-0.2.0/tests/unit/test_user_simulator_tools.py +48 -0
- tolokaforge-0.2.0/tests/unit/test_user_tools.py +138 -0
- tolokaforge-0.2.0/tests/unit/test_wheel_resolver.py +545 -0
- tolokaforge-0.2.0/tests/unit/test_workspace_editing_tools.py +179 -0
- tolokaforge-0.2.0/tests/utils/__init__.py +1 -0
- tolokaforge-0.2.0/tests/utils/containers.py +220 -0
- tolokaforge-0.2.0/tests/utils/docker_helpers.py +75 -0
- tolokaforge-0.2.0/tests/utils/fixtures.py +218 -0
- tolokaforge-0.2.0/tests/utils/mock_clients.py +79 -0
- tolokaforge-0.2.0/tests/utils/networks.py +86 -0
- tolokaforge-0.2.0/tests/utils/project_fixtures.py +408 -0
- tolokaforge-0.2.0/tests/utils/validators.py +243 -0
- tolokaforge-0.2.0/tolokaforge/__init__.py +54 -0
- tolokaforge-0.2.0/tolokaforge/adapters/__init__.py +136 -0
- tolokaforge-0.2.0/tolokaforge/adapters/_task_loader.py +270 -0
- tolokaforge-0.2.0/tolokaforge/adapters/base.py +495 -0
- tolokaforge-0.2.0/tolokaforge/adapters/native.py +939 -0
- tolokaforge-0.2.0/tolokaforge/agent/__init__.py +12 -0
- tolokaforge-0.2.0/tolokaforge/agent/__main__.py +6 -0
- tolokaforge-0.2.0/tolokaforge/agent/agent.proto +84 -0
- tolokaforge-0.2.0/tolokaforge/agent/agent_pb2.py +52 -0
- tolokaforge-0.2.0/tolokaforge/agent/agent_pb2_grpc.py +150 -0
- tolokaforge-0.2.0/tolokaforge/agent/service.py +193 -0
- tolokaforge-0.2.0/tolokaforge/cli/__init__.py +1 -0
- tolokaforge-0.2.0/tolokaforge/cli/adapter_commands.py +160 -0
- tolokaforge-0.2.0/tolokaforge/cli/config_commands.py +122 -0
- tolokaforge-0.2.0/tolokaforge/cli/docker_commands.py +186 -0
- tolokaforge-0.2.0/tolokaforge/cli/main.py +472 -0
- tolokaforge-0.2.0/tolokaforge/core/__init__.py +1 -0
- tolokaforge-0.2.0/tolokaforge/core/config_validator.py +361 -0
- tolokaforge-0.2.0/tolokaforge/core/data/model_presets.yaml +176 -0
- tolokaforge-0.2.0/tolokaforge/core/data/pricing.json +1841 -0
- tolokaforge-0.2.0/tolokaforge/core/docker_adapter.py +220 -0
- tolokaforge-0.2.0/tolokaforge/core/docker_runtime.py +597 -0
- tolokaforge-0.2.0/tolokaforge/core/env_state.py +268 -0
- tolokaforge-0.2.0/tolokaforge/core/evaluators/__init__.py +7 -0
- tolokaforge-0.2.0/tolokaforge/core/evaluators/action_evaluator.py +167 -0
- tolokaforge-0.2.0/tolokaforge/core/evaluators/communicate_evaluator.py +145 -0
- tolokaforge-0.2.0/tolokaforge/core/evaluators/environment_evaluator.py +238 -0
- tolokaforge-0.2.0/tolokaforge/core/failure_attribution.py +173 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/__init__.py +87 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/check_runner.py +626 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/checks_helpers.py +471 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/checks_interface.py +514 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/combine.py +459 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/fuzzy_compare.py +371 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/judge.py +1215 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/state_checks.py +468 -0
- tolokaforge-0.2.0/tolokaforge/core/grading/transcript.py +131 -0
- tolokaforge-0.2.0/tolokaforge/core/hash.py +183 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/__init__.py +131 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/_dict_maps.py +140 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/cache_policy.py +127 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/capabilities.py +71 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/client.py +1563 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/content_policy.py +117 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/params_policy.py +263 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/presets.py +431 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/prompt_policy.py +99 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/reasoning.py +134 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/reasoning_codec.py +467 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/response_policy.py +297 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/schema_sanitizer.py +816 -0
- tolokaforge-0.2.0/tolokaforge/core/llm/usage.py +277 -0
- tolokaforge-0.2.0/tolokaforge/core/logging.py +224 -0
- tolokaforge-0.2.0/tolokaforge/core/metrics.py +320 -0
- tolokaforge-0.2.0/tolokaforge/core/models.py +562 -0
- tolokaforge-0.2.0/tolokaforge/core/mounts.py +73 -0
- tolokaforge-0.2.0/tolokaforge/core/orchestrator.py +1907 -0
- tolokaforge-0.2.0/tolokaforge/core/output/__init__.py +32 -0
- tolokaforge-0.2.0/tolokaforge/core/output/artifacts.py +313 -0
- tolokaforge-0.2.0/tolokaforge/core/output_writer.py +215 -0
- tolokaforge-0.2.0/tolokaforge/core/pricing.py +296 -0
- tolokaforge-0.2.0/tolokaforge/core/rate_limiter.py +34 -0
- tolokaforge-0.2.0/tolokaforge/core/resume.py +255 -0
- tolokaforge-0.2.0/tolokaforge/core/run_queue.py +614 -0
- tolokaforge-0.2.0/tolokaforge/core/runner.py +587 -0
- tolokaforge-0.2.0/tolokaforge/core/schema/grade.json +34 -0
- tolokaforge-0.2.0/tolokaforge/core/schema/metrics.json +28 -0
- tolokaforge-0.2.0/tolokaforge/core/schema/tool_call.json +15 -0
- tolokaforge-0.2.0/tolokaforge/core/schema/trajectory.json +79 -0
- tolokaforge-0.2.0/tolokaforge/core/search/__init__.py +18 -0
- tolokaforge-0.2.0/tolokaforge/core/search/domain_state.py +251 -0
- tolokaforge-0.2.0/tolokaforge/core/search/typesense.py +286 -0
- tolokaforge-0.2.0/tolokaforge/core/search/typesense_server.py +384 -0
- tolokaforge-0.2.0/tolokaforge/core/stuck.py +115 -0
- tolokaforge-0.2.0/tolokaforge/core/tools_interface.py +388 -0
- tolokaforge-0.2.0/tolokaforge/core/utils/__init__.py +1 -0
- tolokaforge-0.2.0/tolokaforge/core/utils/diff.py +125 -0
- tolokaforge-0.2.0/tolokaforge/docker/__init__.py +165 -0
- tolokaforge-0.2.0/tolokaforge/docker/builder.py +371 -0
- tolokaforge-0.2.0/tolokaforge/docker/config.py +197 -0
- tolokaforge-0.2.0/tolokaforge/docker/container.py +1083 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/__init__.py +40 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/agent.Dockerfile +18 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/db_service.Dockerfile +38 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/executor.Dockerfile +25 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/json_db.Dockerfile +17 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/mock_web.Dockerfile +17 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/orchestrator.Dockerfile +22 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/rag.Dockerfile +35 -0
- tolokaforge-0.2.0/tolokaforge/docker/dockerfiles/runner.Dockerfile +87 -0
- tolokaforge-0.2.0/tolokaforge/docker/health.py +812 -0
- tolokaforge-0.2.0/tolokaforge/docker/image.py +644 -0
- tolokaforge-0.2.0/tolokaforge/docker/logging.py +750 -0
- tolokaforge-0.2.0/tolokaforge/docker/mount.py +376 -0
- tolokaforge-0.2.0/tolokaforge/docker/network.py +546 -0
- tolokaforge-0.2.0/tolokaforge/docker/policy.py +296 -0
- tolokaforge-0.2.0/tolokaforge/docker/ports.py +240 -0
- tolokaforge-0.2.0/tolokaforge/docker/registry.py +347 -0
- tolokaforge-0.2.0/tolokaforge/docker/stack.py +932 -0
- tolokaforge-0.2.0/tolokaforge/docker/stacks/__init__.py +30 -0
- tolokaforge-0.2.0/tolokaforge/docker/stacks/core.py +196 -0
- tolokaforge-0.2.0/tolokaforge/docker/stacks/full.py +135 -0
- tolokaforge-0.2.0/tolokaforge/docker/stacks/test.py +70 -0
- tolokaforge-0.2.0/tolokaforge/docker/stacks/typesense.py +83 -0
- tolokaforge-0.2.0/tolokaforge/docker/wait_for_services.py +251 -0
- tolokaforge-0.2.0/tolokaforge/docker/wheel_resolver.py +656 -0
- tolokaforge-0.2.0/tolokaforge/env/__init__.py +1 -0
- tolokaforge-0.2.0/tolokaforge/env/json_db_service/app.py +1195 -0
- tolokaforge-0.2.0/tolokaforge/env/json_db_service/requirements.txt +4 -0
- tolokaforge-0.2.0/tolokaforge/env/mock_web_service/app.py +470 -0
- tolokaforge-0.2.0/tolokaforge/env/mock_web_service/requirements.txt +5 -0
- tolokaforge-0.2.0/tolokaforge/env/rag_service/app.py +527 -0
- tolokaforge-0.2.0/tolokaforge/env/rag_service/requirements.txt +12 -0
- tolokaforge-0.2.0/tolokaforge/executor/__init__.py +12 -0
- tolokaforge-0.2.0/tolokaforge/executor/__main__.py +6 -0
- tolokaforge-0.2.0/tolokaforge/executor/executor.proto +78 -0
- tolokaforge-0.2.0/tolokaforge/executor/executor_pb2.py +50 -0
- tolokaforge-0.2.0/tolokaforge/executor/executor_pb2_grpc.py +197 -0
- tolokaforge-0.2.0/tolokaforge/executor/service.py +288 -0
- tolokaforge-0.2.0/tolokaforge/runner/__init__.py +75 -0
- tolokaforge-0.2.0/tolokaforge/runner/__main__.py +335 -0
- tolokaforge-0.2.0/tolokaforge/runner/db_client.py +696 -0
- tolokaforge-0.2.0/tolokaforge/runner/db_proxy.py +727 -0
- tolokaforge-0.2.0/tolokaforge/runner/grading.py +841 -0
- tolokaforge-0.2.0/tolokaforge/runner/models.py +690 -0
- tolokaforge-0.2.0/tolokaforge/runner/rag_client.py +540 -0
- tolokaforge-0.2.0/tolokaforge/runner/runner.proto +333 -0
- tolokaforge-0.2.0/tolokaforge/runner/runner_pb2.py +73 -0
- tolokaforge-0.2.0/tolokaforge/runner/runner_pb2_grpc.py +407 -0
- tolokaforge-0.2.0/tolokaforge/runner/service.py +1872 -0
- tolokaforge-0.2.0/tolokaforge/runner/tool_factory.py +1656 -0
- tolokaforge-0.2.0/tolokaforge/secrets/__init__.py +50 -0
- tolokaforge-0.2.0/tolokaforge/secrets/config.py +154 -0
- tolokaforge-0.2.0/tolokaforge/secrets/log_filter.py +134 -0
- tolokaforge-0.2.0/tolokaforge/secrets/manager.py +400 -0
- tolokaforge-0.2.0/tolokaforge/secrets/providers.py +360 -0
- tolokaforge-0.2.0/tolokaforge/tools/__init__.py +1 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/__init__.py +47 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/bash.py +124 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/browser.py +1273 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/calculator.py +102 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/db_json.py +286 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/files.py +731 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/http_request.py +163 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/mobile.py +235 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/rag_search.py +116 -0
- tolokaforge-0.2.0/tolokaforge/tools/builtin/registry.py +125 -0
- tolokaforge-0.2.0/tolokaforge/tools/registry.py +449 -0
- tolokaforge-0.2.0/tolokaforge/tools/user_tools.py +379 -0
- tolokaforge-0.2.0/tools/AGENTS.md +33 -0
- tolokaforge-0.2.0/tools/dev-mcp/README.md +47 -0
- tolokaforge-0.2.0/tools/dev-mcp/pyproject.toml +16 -0
- tolokaforge-0.2.0/tools/dev-mcp/src/dev_mcp/__init__.py +0 -0
- tolokaforge-0.2.0/tools/dev-mcp/src/dev_mcp/server.py +375 -0
- tolokaforge-0.2.0/tools/dev-mcp/src/dev_mcp/subprocess_utils.py +182 -0
- tolokaforge-0.2.0/tools/dev-mcp/tests/__init__.py +0 -0
- tolokaforge-0.2.0/tools/dev-mcp/tests/test_server.py +177 -0
- tolokaforge-0.2.0/tools/pricing-updater/README.md +33 -0
- tolokaforge-0.2.0/tools/pricing-updater/pyproject.toml +18 -0
- tolokaforge-0.2.0/tools/pricing-updater/src/pricing_updater/__init__.py +1 -0
- tolokaforge-0.2.0/tools/pricing-updater/src/pricing_updater/__main__.py +5 -0
- tolokaforge-0.2.0/tools/pricing-updater/src/pricing_updater/cli.py +145 -0
- tolokaforge-0.2.0/tools/pricing-updater/src/pricing_updater/fetcher.py +232 -0
- tolokaforge-0.2.0/tools/pricing-updater/tests/__init__.py +0 -0
- tolokaforge-0.2.0/tools/pricing-updater/tests/test_fetcher.py +636 -0
- tolokaforge-0.2.0/uv.lock +4365 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Cursor settings — all project rules are in AGENTS.md
|
|
3
|
+
alwaysApply: true
|
|
4
|
+
---
|
|
5
|
+
# Cursor Configuration
|
|
6
|
+
|
|
7
|
+
All project rules, conventions, and commands are defined in `AGENTS.md` at the repository root.
|
|
8
|
+
Read it first — it is the single source of truth for all AI agents.
|
|
9
|
+
|
|
10
|
+
## Session Startup
|
|
11
|
+
|
|
12
|
+
1. Read `README.md` and `.vscode/tasks.json` before writing any code
|
|
13
|
+
2. Do not ask permission — these are essential context
|
|
14
|
+
|
|
15
|
+
## Cursor-Specific
|
|
16
|
+
|
|
17
|
+
- Use Context7 MCP (if available) to look up library/framework documentation before guessing at APIs
|
|
18
|
+
- Available VSCode tasks are in `.vscode/tasks.json`
|
|
19
|
+
- Plans and scratch files go in `plans/` directory — it is gitignored
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
*.egg-info/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
*.egg
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
ENV/
|
|
16
|
+
.venv
|
|
17
|
+
|
|
18
|
+
# IDEs
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
*~
|
|
24
|
+
|
|
25
|
+
# OS
|
|
26
|
+
.DS_Store
|
|
27
|
+
Thumbs.db
|
|
28
|
+
|
|
29
|
+
# Git
|
|
30
|
+
.git/
|
|
31
|
+
.gitignore
|
|
32
|
+
.gitattributes
|
|
33
|
+
|
|
34
|
+
# Documentation
|
|
35
|
+
*.md
|
|
36
|
+
!README.md
|
|
37
|
+
docs/
|
|
38
|
+
|
|
39
|
+
# Testing
|
|
40
|
+
.pytest_cache/
|
|
41
|
+
.coverage
|
|
42
|
+
htmlcov/
|
|
43
|
+
.tox/
|
|
44
|
+
|
|
45
|
+
# Results and outputs
|
|
46
|
+
results/
|
|
47
|
+
*.log
|
|
48
|
+
*.json
|
|
49
|
+
*.csv
|
|
50
|
+
*.html
|
|
51
|
+
|
|
52
|
+
# CI/CD
|
|
53
|
+
.github/
|
|
54
|
+
.gitlab-ci.yml
|
|
55
|
+
|
|
56
|
+
# Docker
|
|
57
|
+
docker-compose.override.yml
|
|
58
|
+
.dockerignore
|
|
59
|
+
|
|
60
|
+
# Temporary files
|
|
61
|
+
tmp/
|
|
62
|
+
temp/
|
|
63
|
+
*.tmp
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Environment variables for Tolokaforge
|
|
2
|
+
# Copy this file to .env and fill in your API keys:
|
|
3
|
+
# cp .env.example .env
|
|
4
|
+
|
|
5
|
+
# At least one LLM provider key is required.
|
|
6
|
+
# Most examples use OpenRouter by default.
|
|
7
|
+
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
|
8
|
+
|
|
9
|
+
# Optional: direct provider keys (used when provider is set to "anthropic", "openai", etc.)
|
|
10
|
+
# ANTHROPIC_API_KEY=your-anthropic-api-key-here
|
|
11
|
+
# OPENAI_API_KEY=your-openai-api-key-here
|
|
12
|
+
# GOOGLE_API_KEY=your-google-api-key-here
|
|
13
|
+
|
|
14
|
+
# Gemini direct (Google AI Studio): use when provider=gemini in ModelConfig.
|
|
15
|
+
# Create at https://aistudio.google.com (no GCP IAM required; key is a bearer token).
|
|
16
|
+
# Bypasses OpenRouter — useful for discriminating model behavior from provider transport.
|
|
17
|
+
# GEMINI_API_KEY=your-gemini-studio-api-key-here
|
|
18
|
+
|
|
19
|
+
# Vertex AI (GCP): use when provider=vertex_ai in ModelConfig.
|
|
20
|
+
# Service account needs role roles/aiplatform.user (or aiplatform.endpoints.predict).
|
|
21
|
+
# Download a JSON key, point GOOGLE_APPLICATION_CREDENTIALS at it.
|
|
22
|
+
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
|
23
|
+
# VERTEXAI_PROJECT=your-gcp-project-id
|
|
24
|
+
# VERTEXAI_LOCATION=us-central1
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# Trajectory output files (large JSON/YAML from benchmark runs)
|
|
2
|
+
tests/data/projects/*/output/**/*.json filter=lfs diff=lfs merge=lfs -text
|
|
3
|
+
tests/data/projects/*/output/**/*.yaml filter=lfs diff=lfs merge=lfs -text
|
|
4
|
+
|
|
5
|
+
# Mobile task screenshots (binary PNGs)
|
|
6
|
+
tasks/mobile/_assets/images/*.png filter=lfs diff=lfs merge=lfs -text
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
paths:
|
|
8
|
+
- 'tolokaforge/**'
|
|
9
|
+
- 'tests/**'
|
|
10
|
+
- 'scripts/**'
|
|
11
|
+
- 'tools/**'
|
|
12
|
+
- 'pyproject.toml'
|
|
13
|
+
- 'uv.lock'
|
|
14
|
+
- '.github/workflows/ci.yml'
|
|
15
|
+
- '.pre-commit-config.yaml'
|
|
16
|
+
pull_request:
|
|
17
|
+
types: [opened, synchronize, reopened, labeled]
|
|
18
|
+
paths:
|
|
19
|
+
- 'tolokaforge/**'
|
|
20
|
+
- 'tests/**'
|
|
21
|
+
- 'scripts/**'
|
|
22
|
+
- 'tools/**'
|
|
23
|
+
- 'pyproject.toml'
|
|
24
|
+
- 'uv.lock'
|
|
25
|
+
- '.github/workflows/ci.yml'
|
|
26
|
+
- '.pre-commit-config.yaml'
|
|
27
|
+
schedule:
|
|
28
|
+
- cron: '0 6 * * *'
|
|
29
|
+
|
|
30
|
+
permissions:
|
|
31
|
+
contents: read
|
|
32
|
+
|
|
33
|
+
jobs:
|
|
34
|
+
# ============================================================================
|
|
35
|
+
# Lint job - Fast feedback on code quality
|
|
36
|
+
# ============================================================================
|
|
37
|
+
lint:
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
timeout-minutes: 20
|
|
40
|
+
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v4
|
|
43
|
+
|
|
44
|
+
- name: Install uv
|
|
45
|
+
uses: astral-sh/setup-uv@v4
|
|
46
|
+
with:
|
|
47
|
+
version: "latest"
|
|
48
|
+
enable-cache: true
|
|
49
|
+
cache-dependency-glob: "uv.lock"
|
|
50
|
+
|
|
51
|
+
- name: Set up Python 3.12
|
|
52
|
+
run: uv python install 3.12
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
- name: Install dependencies
|
|
56
|
+
run: uv sync --dev
|
|
57
|
+
|
|
58
|
+
- name: Run pre-commit checks
|
|
59
|
+
run: uv run pre-commit run --all-files --show-diff-on-failure
|
|
60
|
+
|
|
61
|
+
# ============================================================================
|
|
62
|
+
# PR smoke tests - fast required gate
|
|
63
|
+
# ============================================================================
|
|
64
|
+
test-smoke:
|
|
65
|
+
runs-on: ubuntu-latest
|
|
66
|
+
if: github.event_name == 'pull_request'
|
|
67
|
+
needs: lint # Only run tests if linting passes
|
|
68
|
+
timeout-minutes: 45
|
|
69
|
+
|
|
70
|
+
steps:
|
|
71
|
+
- uses: actions/checkout@v4
|
|
72
|
+
with:
|
|
73
|
+
token: ${{ github.token }}
|
|
74
|
+
lfs: true
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
- name: Install uv
|
|
78
|
+
uses: astral-sh/setup-uv@v4
|
|
79
|
+
with:
|
|
80
|
+
version: "latest"
|
|
81
|
+
enable-cache: true
|
|
82
|
+
cache-dependency-glob: "uv.lock"
|
|
83
|
+
|
|
84
|
+
- name: Set up Python 3.12
|
|
85
|
+
run: uv python install 3.12
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
- name: Install dependencies
|
|
89
|
+
run: |
|
|
90
|
+
uv sync --dev
|
|
91
|
+
|
|
92
|
+
- name: Install Playwright browsers
|
|
93
|
+
run: |
|
|
94
|
+
uv run python -m playwright install chromium
|
|
95
|
+
|
|
96
|
+
- name: Create .env file
|
|
97
|
+
run: |
|
|
98
|
+
echo "OPENROUTER_API_KEY=${{ secrets.OPENROUTER_TESTING_TOKEN }}" > .env
|
|
99
|
+
|
|
100
|
+
- name: Run unit + canonical tests with coverage
|
|
101
|
+
run: |
|
|
102
|
+
uv run pytest tests/unit/ tests/canonical/ -v --tb=short \
|
|
103
|
+
--cov=tolokaforge --cov-report=term-missing --cov-fail-under=60
|
|
104
|
+
|
|
105
|
+
- name: Run tool tests
|
|
106
|
+
run: |
|
|
107
|
+
for tool_dir in tools/*/; do
|
|
108
|
+
if [ -d "${tool_dir}tests" ]; then
|
|
109
|
+
echo "=== Testing ${tool_dir} ==="
|
|
110
|
+
(cd "${tool_dir}" && uv run pytest tests/ -v --tb=short -p no:cacheprovider) || exit 1
|
|
111
|
+
fi
|
|
112
|
+
done
|
|
113
|
+
|
|
114
|
+
- name: Validate public example suites
|
|
115
|
+
run: |
|
|
116
|
+
uv run tolokaforge validate --tasks "examples/native/*/dataset/**/task.yaml"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
- name: Docker task-pack mount smoke
|
|
120
|
+
run: |
|
|
121
|
+
scripts/tests/task_pack_docker_smoke.sh
|
|
122
|
+
|
|
123
|
+
# TODO: Re-enable once in-process runtime or Docker-less orchestrator is available.
|
|
124
|
+
# The smoke script requires `tolokaforge run` which now needs a Docker runtime.
|
|
125
|
+
# - name: Run public examples end-to-end (mock smoke)
|
|
126
|
+
# run: |
|
|
127
|
+
# scripts/tests/run_public_examples_smoke.sh
|
|
128
|
+
#
|
|
129
|
+
# - name: Summarize public examples
|
|
130
|
+
# run: |
|
|
131
|
+
# uv run python scripts/tests/summarize_public_examples.py --min-pass-rate 0.0 --min-completion-rate 1.0
|
|
132
|
+
#
|
|
133
|
+
# - name: Upload public example summary artifact
|
|
134
|
+
# uses: actions/upload-artifact@v4
|
|
135
|
+
# with:
|
|
136
|
+
# name: public-example-summary-pr
|
|
137
|
+
# path: |
|
|
138
|
+
# output/public_examples_summary.json
|
|
139
|
+
# output/public_examples_summary.md
|
|
140
|
+
|
|
141
|
+
# ============================================================================
|
|
142
|
+
# Full test suite - push + nightly confidence
|
|
143
|
+
# ============================================================================
|
|
144
|
+
test-full:
|
|
145
|
+
runs-on: ubuntu-latest
|
|
146
|
+
if: github.event_name != 'pull_request'
|
|
147
|
+
needs: [lint]
|
|
148
|
+
timeout-minutes: 120
|
|
149
|
+
|
|
150
|
+
steps:
|
|
151
|
+
- uses: actions/checkout@v4
|
|
152
|
+
with:
|
|
153
|
+
token: ${{ github.token }}
|
|
154
|
+
lfs: true
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
- name: Install uv
|
|
158
|
+
uses: astral-sh/setup-uv@v4
|
|
159
|
+
with:
|
|
160
|
+
version: "latest"
|
|
161
|
+
enable-cache: true
|
|
162
|
+
cache-dependency-glob: "uv.lock"
|
|
163
|
+
|
|
164
|
+
- name: Set up Python 3.12
|
|
165
|
+
run: uv python install 3.12
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
- name: Install dependencies
|
|
169
|
+
run: |
|
|
170
|
+
uv sync --dev
|
|
171
|
+
|
|
172
|
+
- name: Install Playwright browsers
|
|
173
|
+
run: |
|
|
174
|
+
uv run python -m playwright install chromium
|
|
175
|
+
|
|
176
|
+
- name: Create .env file
|
|
177
|
+
run: |
|
|
178
|
+
echo "OPENROUTER_API_KEY=${{ secrets.OPENROUTER_TESTING_TOKEN }}" > .env
|
|
179
|
+
|
|
180
|
+
- name: Validate public example suites
|
|
181
|
+
run: |
|
|
182
|
+
uv run tolokaforge validate --tasks "examples/native/*/dataset/**/task.yaml"
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
- name: Docker task-pack mount smoke
|
|
186
|
+
run: |
|
|
187
|
+
scripts/tests/task_pack_docker_smoke.sh
|
|
188
|
+
|
|
189
|
+
# TODO: Re-enable once in-process runtime or Docker-less orchestrator is available.
|
|
190
|
+
# - name: Run public examples end-to-end (mock smoke)
|
|
191
|
+
# run: |
|
|
192
|
+
# scripts/tests/run_public_examples_smoke.sh
|
|
193
|
+
#
|
|
194
|
+
# - name: Summarize public examples
|
|
195
|
+
# run: |
|
|
196
|
+
# uv run python scripts/tests/summarize_public_examples.py --min-pass-rate 0.0 --min-completion-rate 1.0
|
|
197
|
+
|
|
198
|
+
- name: Run unit + canonical tests with coverage
|
|
199
|
+
run: |
|
|
200
|
+
uv run pytest tests/unit/ tests/canonical/ -v --tb=short \
|
|
201
|
+
--cov=tolokaforge --cov-report=term-missing --cov-fail-under=60
|
|
202
|
+
|
|
203
|
+
- name: Run tool tests
|
|
204
|
+
run: |
|
|
205
|
+
for tool_dir in tools/*/; do
|
|
206
|
+
if [ -d "${tool_dir}tests" ]; then
|
|
207
|
+
echo "=== Testing ${tool_dir} ==="
|
|
208
|
+
(cd "${tool_dir}" && uv run pytest tests/ -v --tb=short -p no:cacheprovider) || exit 1
|
|
209
|
+
fi
|
|
210
|
+
done
|
|
211
|
+
|
|
212
|
+
- name: Build Docker images for integration tests
|
|
213
|
+
if: github.event_name == 'schedule' || github.event_name == 'push'
|
|
214
|
+
id: docker-build-full
|
|
215
|
+
run: |
|
|
216
|
+
uv run tolokaforge docker build --core
|
|
217
|
+
|
|
218
|
+
- name: Run integration tests
|
|
219
|
+
if: (github.event_name == 'schedule' || github.event_name == 'push') && steps.docker-build-full.outcome == 'success'
|
|
220
|
+
run: |
|
|
221
|
+
uv run pytest tests/integration/ -v --tb=short
|
|
222
|
+
|
|
223
|
+
- name: Generate coverage XML for Codecov
|
|
224
|
+
if: github.event_name == 'schedule'
|
|
225
|
+
run: |
|
|
226
|
+
uv run coverage xml
|
|
227
|
+
|
|
228
|
+
- name: Upload coverage to Codecov
|
|
229
|
+
if: github.event_name == 'schedule'
|
|
230
|
+
uses: codecov/codecov-action@v3
|
|
231
|
+
with:
|
|
232
|
+
files: ./coverage.xml
|
|
233
|
+
flags: unittests
|
|
234
|
+
name: codecov-umbrella
|
|
235
|
+
continue-on-error: true
|
|
236
|
+
|
|
237
|
+
# ============================================================================
|
|
238
|
+
# Label-triggered gate - full suite + coverage (add 'ready-to-merge' label)
|
|
239
|
+
# ============================================================================
|
|
240
|
+
test-gate:
|
|
241
|
+
runs-on: ubuntu-latest
|
|
242
|
+
if: >-
|
|
243
|
+
github.event_name == 'pull_request' &&
|
|
244
|
+
contains(github.event.pull_request.labels.*.name, 'ready-to-merge')
|
|
245
|
+
needs: [lint]
|
|
246
|
+
timeout-minutes: 120
|
|
247
|
+
|
|
248
|
+
steps:
|
|
249
|
+
- uses: actions/checkout@v4
|
|
250
|
+
with:
|
|
251
|
+
token: ${{ github.token }}
|
|
252
|
+
lfs: true
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
- name: Install uv
|
|
256
|
+
uses: astral-sh/setup-uv@v4
|
|
257
|
+
with:
|
|
258
|
+
version: "latest"
|
|
259
|
+
enable-cache: true
|
|
260
|
+
cache-dependency-glob: "uv.lock"
|
|
261
|
+
|
|
262
|
+
- name: Set up Python 3.12
|
|
263
|
+
run: uv python install 3.12
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
- name: Install dependencies
|
|
267
|
+
run: |
|
|
268
|
+
uv sync --dev
|
|
269
|
+
|
|
270
|
+
- name: Install Playwright browsers
|
|
271
|
+
run: |
|
|
272
|
+
uv run python -m playwright install chromium
|
|
273
|
+
|
|
274
|
+
- name: Create .env file
|
|
275
|
+
run: |
|
|
276
|
+
echo "OPENROUTER_API_KEY=${{ secrets.OPENROUTER_TESTING_TOKEN }}" > .env
|
|
277
|
+
|
|
278
|
+
- name: Validate public example suites
|
|
279
|
+
run: |
|
|
280
|
+
uv run tolokaforge validate --tasks "examples/native/*/dataset/**/task.yaml"
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
- name: Docker task-pack mount smoke
|
|
284
|
+
run: |
|
|
285
|
+
scripts/tests/task_pack_docker_smoke.sh
|
|
286
|
+
|
|
287
|
+
- name: Run unit + canonical tests with coverage
|
|
288
|
+
run: |
|
|
289
|
+
uv run pytest tests/unit/ tests/canonical/ -v --tb=short \
|
|
290
|
+
--cov=tolokaforge --cov-report=xml --cov-report=term-missing \
|
|
291
|
+
--cov-fail-under=60
|
|
292
|
+
|
|
293
|
+
- name: Build Docker images for integration tests
|
|
294
|
+
id: docker-build
|
|
295
|
+
run: |
|
|
296
|
+
uv run tolokaforge docker build --core
|
|
297
|
+
|
|
298
|
+
- name: Run integration tests
|
|
299
|
+
if: steps.docker-build.outcome == 'success'
|
|
300
|
+
run: |
|
|
301
|
+
uv run pytest tests/integration/ -v --tb=short
|
|
302
|
+
|
|
303
|
+
- name: Upload coverage to Codecov
|
|
304
|
+
uses: codecov/codecov-action@v3
|
|
305
|
+
with:
|
|
306
|
+
files: ./coverage.xml
|
|
307
|
+
flags: merge-gate
|
|
308
|
+
name: codecov-merge-gate
|
|
309
|
+
continue-on-error: true
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
name: Claude Code PR Review
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [opened, synchronize, reopened]
|
|
6
|
+
issue_comment:
|
|
7
|
+
types: [created]
|
|
8
|
+
pull_request_review_comment:
|
|
9
|
+
types: [created]
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
hygiene-review:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
if: >
|
|
15
|
+
github.event_name == 'pull_request' ||
|
|
16
|
+
(github.event_name == 'issue_comment' &&
|
|
17
|
+
contains(github.event.comment.body, '@claude')) ||
|
|
18
|
+
(github.event_name == 'pull_request_review_comment' &&
|
|
19
|
+
contains(github.event.comment.body, '@claude'))
|
|
20
|
+
permissions:
|
|
21
|
+
contents: read
|
|
22
|
+
pull-requests: write
|
|
23
|
+
issues: write
|
|
24
|
+
id-token: write
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
with:
|
|
28
|
+
fetch-depth: 0
|
|
29
|
+
|
|
30
|
+
- uses: anthropics/claude-code-action@beta
|
|
31
|
+
with:
|
|
32
|
+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
33
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
34
|
+
direct_prompt: |
|
|
35
|
+
Review this PR against the project's AGENTS.md rules. Flag any
|
|
36
|
+
violation with file path, line number, and a concrete suggested
|
|
37
|
+
fix. If everything checks out, say so explicitly.
|
|
38
|
+
|
|
39
|
+
1. CODE QUALITY: Errors are surfaced explicitly. No silent
|
|
40
|
+
fallbacks, no swallowed exceptions (`except Exception: pass`),
|
|
41
|
+
no returning `None` instead of raising. Functions under 100
|
|
42
|
+
lines, nesting depth < 3.
|
|
43
|
+
|
|
44
|
+
2. SECRETS — single abstraction (HARD RULE): API keys, DB
|
|
45
|
+
credentials, tokens, signing keys, and any other credential
|
|
46
|
+
are accessed *only* via `SecretManager`
|
|
47
|
+
(`tolokaforge.secrets`). Flag any new `os.environ.get` /
|
|
48
|
+
`os.getenv` for credentials, any `load_dotenv()` call, any
|
|
49
|
+
`from dotenv import` outside `tolokaforge.secrets`, any
|
|
50
|
+
direct `.env` / `.netrc` / `.aws/credentials` file read,
|
|
51
|
+
any one-off env-var helper, any secret baked into a Docker
|
|
52
|
+
image / build-arg / mount / image-tag. New secret backends
|
|
53
|
+
must ship as new `SecretProvider` subclasses, never as
|
|
54
|
+
ad-hoc call sites. The
|
|
55
|
+
`tests/unit/secrets/test_no_raw_secret_access.py`
|
|
56
|
+
enforcement test must remain green.
|
|
57
|
+
|
|
58
|
+
3. TESTING: Every new test file has a `pytestmark` marker
|
|
59
|
+
(`pytest.mark.unit`, `.canonical`, or `.integration`).
|
|
60
|
+
Zero `xfail`, zero bare `@skip` — use conditional markers
|
|
61
|
+
(`requires_api`, `requires_docker`). MockAsyncClient comes
|
|
62
|
+
from `tests.utils.mock_clients` only — no local copies.
|
|
63
|
+
|
|
64
|
+
4. ARCHITECTURE: Harness logic stays generic. Task-specific
|
|
65
|
+
logic lives in task packs only. Clean abstraction
|
|
66
|
+
boundaries. Backward compatibility for task contracts.
|
|
67
|
+
|
|
68
|
+
5. REPOSITORY HYGIENE: No scripts, data, temp files, or logs
|
|
69
|
+
in the repo root. Documentation updated when user-facing
|
|
70
|
+
behaviour changes.
|
|
71
|
+
|
|
72
|
+
6. CODE STANDARDS: DRY — no duplicated logic. Self-describing
|
|
73
|
+
names. Early returns to minimise nesting. No warnings
|
|
74
|
+
suppressed.
|
|
75
|
+
|
|
76
|
+
7. ROOT CLEANLINESS: New files in the repo root must be on
|
|
77
|
+
this allow-list — README.md, LICENSE, CHANGELOG.md,
|
|
78
|
+
CONTRIBUTING.md, CONTRIBUTORS.md, CITATION.*, CLAUDE.md,
|
|
79
|
+
AGENTS.md, pyproject.toml, uv.lock, Makefile,
|
|
80
|
+
docker-compose.yaml, and dotfiles (.gitignore,
|
|
81
|
+
.pre-commit-config.yaml, etc.).
|
|
82
|
+
|
|
83
|
+
8. NO TEMP ARTIFACTS: No temporary plans, log files, JSON
|
|
84
|
+
data dumps, or build outputs.
|
|
85
|
+
|
|
86
|
+
9. SCRIPT LOCATION: Bash scripts only in scripts/. Exceptions:
|
|
87
|
+
tests/ for test helpers, tasks/ for benchmark data,
|
|
88
|
+
.devcontainer/ for container setup, Docker entrypoints
|
|
89
|
+
alongside Dockerfiles.
|
|
90
|
+
|
|
91
|
+
10. SCRIPTS ORGANIZATION: New scripts placed in the correct
|
|
92
|
+
scripts/ subdirectory (benchmark/, setup/, lint/, tests/,
|
|
93
|
+
release/, analysis/).
|
|
94
|
+
|
|
95
|
+
11. PYTHON TOOLS: Complex Python tools in tools/ as uv
|
|
96
|
+
workspace members, linked in
|
|
97
|
+
`pyproject.toml [tool.uv.workspace]`.
|
|
98
|
+
|
|
99
|
+
12. NO PROJECT-SPECIFIC CONTENT ON MAIN: No domain-specific
|
|
100
|
+
configs or runner scripts on the `main` branch.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
name: Publish tolokaforge-adapter-terminal-bench to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "adapter-terminal-bench-v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
target:
|
|
10
|
+
description: "Publish target"
|
|
11
|
+
required: true
|
|
12
|
+
type: choice
|
|
13
|
+
options:
|
|
14
|
+
- testpypi
|
|
15
|
+
- pypi
|
|
16
|
+
|
|
17
|
+
# Cancel in-progress runs for the same tag
|
|
18
|
+
concurrency:
|
|
19
|
+
group: publish-adapter-terminal-bench-${{ github.ref }}
|
|
20
|
+
cancel-in-progress: true
|
|
21
|
+
|
|
22
|
+
jobs:
|
|
23
|
+
build:
|
|
24
|
+
name: Build distribution
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
permissions:
|
|
27
|
+
contents: read
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
with:
|
|
31
|
+
fetch-depth: 0
|
|
32
|
+
|
|
33
|
+
- name: Install uv
|
|
34
|
+
uses: astral-sh/setup-uv@v7
|
|
35
|
+
|
|
36
|
+
- name: Install Python
|
|
37
|
+
run: uv python install 3.12
|
|
38
|
+
|
|
39
|
+
- name: Build package
|
|
40
|
+
run: uv build --package tolokaforge-adapter-terminal-bench
|
|
41
|
+
|
|
42
|
+
- name: Verify package contents
|
|
43
|
+
run: |
|
|
44
|
+
echo "=== Built artifacts ==="
|
|
45
|
+
ls -lh dist/
|
|
46
|
+
echo ""
|
|
47
|
+
echo "=== Wheel contents ==="
|
|
48
|
+
uv run python -c "
|
|
49
|
+
import zipfile, sys
|
|
50
|
+
for f in __import__('pathlib').Path('dist').glob('*.whl'):
|
|
51
|
+
print(f'--- {f.name} ---')
|
|
52
|
+
with zipfile.ZipFile(f) as zf:
|
|
53
|
+
for name in sorted(zf.namelist()):
|
|
54
|
+
print(f' {name}')
|
|
55
|
+
"
|
|
56
|
+
|
|
57
|
+
- name: Upload distribution artifacts
|
|
58
|
+
uses: actions/upload-artifact@v4
|
|
59
|
+
with:
|
|
60
|
+
name: adapter-terminal-bench-dist
|
|
61
|
+
path: dist/
|
|
62
|
+
if-no-files-found: error
|
|
63
|
+
|
|
64
|
+
publish-testpypi:
|
|
65
|
+
name: Publish to TestPyPI
|
|
66
|
+
needs: build
|
|
67
|
+
if: github.event_name == 'workflow_dispatch' && github.event.inputs.target == 'testpypi'
|
|
68
|
+
runs-on: ubuntu-latest
|
|
69
|
+
environment: testpypi
|
|
70
|
+
permissions:
|
|
71
|
+
id-token: write
|
|
72
|
+
steps:
|
|
73
|
+
- name: Install uv
|
|
74
|
+
uses: astral-sh/setup-uv@v7
|
|
75
|
+
|
|
76
|
+
- name: Download distribution artifacts
|
|
77
|
+
uses: actions/download-artifact@v4
|
|
78
|
+
with:
|
|
79
|
+
name: adapter-terminal-bench-dist
|
|
80
|
+
path: dist/
|
|
81
|
+
|
|
82
|
+
- name: List downloaded files
|
|
83
|
+
run: find dist/ -type f
|
|
84
|
+
|
|
85
|
+
- name: Publish to TestPyPI
|
|
86
|
+
run: uv publish dist/* --publish-url https://test.pypi.org/legacy/ --trusted-publishing always
|
|
87
|
+
|
|
88
|
+
publish-pypi:
|
|
89
|
+
name: Publish to PyPI
|
|
90
|
+
needs: build
|
|
91
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/adapter-terminal-bench-v') || (github.event_name == 'workflow_dispatch' && github.event.inputs.target == 'pypi')
|
|
92
|
+
runs-on: ubuntu-latest
|
|
93
|
+
environment: release
|
|
94
|
+
permissions:
|
|
95
|
+
id-token: write
|
|
96
|
+
steps:
|
|
97
|
+
- name: Install uv
|
|
98
|
+
uses: astral-sh/setup-uv@v7
|
|
99
|
+
|
|
100
|
+
- name: Download distribution artifacts
|
|
101
|
+
uses: actions/download-artifact@v4
|
|
102
|
+
with:
|
|
103
|
+
name: adapter-terminal-bench-dist
|
|
104
|
+
path: dist/
|
|
105
|
+
|
|
106
|
+
- name: List downloaded files
|
|
107
|
+
run: find dist/ -type f
|
|
108
|
+
|
|
109
|
+
- name: Publish to PyPI
|
|
110
|
+
run: uv publish dist/* --trusted-publishing always
|
|
111
|
+
|
|
112
|
+
github-release:
|
|
113
|
+
name: Create GitHub Release
|
|
114
|
+
needs: publish-pypi
|
|
115
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/adapter-terminal-bench-v')
|
|
116
|
+
runs-on: ubuntu-latest
|
|
117
|
+
permissions:
|
|
118
|
+
contents: write
|
|
119
|
+
steps:
|
|
120
|
+
- uses: actions/checkout@v4
|
|
121
|
+
|
|
122
|
+
- name: Download distribution artifacts
|
|
123
|
+
uses: actions/download-artifact@v4
|
|
124
|
+
with:
|
|
125
|
+
name: adapter-terminal-bench-dist
|
|
126
|
+
path: dist/
|
|
127
|
+
|
|
128
|
+
- name: Create GitHub Release
|
|
129
|
+
uses: softprops/action-gh-release@v2
|
|
130
|
+
with:
|
|
131
|
+
generate_release_notes: true
|
|
132
|
+
files: dist/*
|