solace-agent-mesh 1.6.1__py3-none-any.whl → 1.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of solace-agent-mesh might be problematic. Click here for more details.
- solace_agent_mesh/agent/adk/alembic/README +74 -0
- solace_agent_mesh/agent/adk/alembic/env.py +77 -0
- solace_agent_mesh/agent/adk/alembic/script.py.mako +28 -0
- solace_agent_mesh/agent/adk/alembic/versions/e2902798564d_adk_session_db_upgrade.py +52 -0
- solace_agent_mesh/agent/adk/alembic.ini +112 -0
- solace_agent_mesh/agent/adk/app_llm_agent.py +26 -0
- solace_agent_mesh/agent/adk/artifacts/filesystem_artifact_service.py +165 -1
- solace_agent_mesh/agent/adk/artifacts/s3_artifact_service.py +163 -0
- solace_agent_mesh/agent/adk/callbacks.py +852 -109
- solace_agent_mesh/agent/adk/embed_resolving_mcp_toolset.py +234 -36
- solace_agent_mesh/agent/adk/intelligent_mcp_callbacks.py +52 -5
- solace_agent_mesh/agent/adk/mcp_content_processor.py +1 -1
- solace_agent_mesh/agent/adk/models/lite_llm.py +77 -21
- solace_agent_mesh/agent/adk/models/oauth2_token_manager.py +24 -137
- solace_agent_mesh/agent/adk/runner.py +85 -20
- solace_agent_mesh/agent/adk/schema_migration.py +88 -0
- solace_agent_mesh/agent/adk/services.py +94 -18
- solace_agent_mesh/agent/adk/setup.py +281 -65
- solace_agent_mesh/agent/adk/stream_parser.py +231 -37
- solace_agent_mesh/agent/adk/tool_wrapper.py +3 -0
- solace_agent_mesh/agent/protocol/event_handlers.py +472 -137
- solace_agent_mesh/agent/proxies/a2a/app.py +3 -2
- solace_agent_mesh/agent/proxies/a2a/component.py +572 -75
- solace_agent_mesh/agent/proxies/a2a/config.py +80 -4
- solace_agent_mesh/agent/proxies/base/app.py +3 -2
- solace_agent_mesh/agent/proxies/base/component.py +188 -22
- solace_agent_mesh/agent/proxies/base/proxy_task_context.py +3 -1
- solace_agent_mesh/agent/sac/app.py +91 -3
- solace_agent_mesh/agent/sac/component.py +591 -157
- solace_agent_mesh/agent/sac/patch_adk.py +8 -16
- solace_agent_mesh/agent/sac/task_execution_context.py +146 -4
- solace_agent_mesh/agent/tools/__init__.py +3 -0
- solace_agent_mesh/agent/tools/audio_tools.py +3 -3
- solace_agent_mesh/agent/tools/builtin_artifact_tools.py +710 -171
- solace_agent_mesh/agent/tools/deep_research_tools.py +2161 -0
- solace_agent_mesh/agent/tools/dynamic_tool.py +2 -0
- solace_agent_mesh/agent/tools/peer_agent_tool.py +82 -15
- solace_agent_mesh/agent/tools/time_tools.py +126 -0
- solace_agent_mesh/agent/tools/tool_config_types.py +57 -2
- solace_agent_mesh/agent/tools/web_search_tools.py +279 -0
- solace_agent_mesh/agent/tools/web_tools.py +125 -17
- solace_agent_mesh/agent/utils/artifact_helpers.py +248 -6
- solace_agent_mesh/agent/utils/context_helpers.py +17 -0
- solace_agent_mesh/assets/docs/404.html +6 -6
- solace_agent_mesh/assets/docs/assets/css/{styles.906a1503.css → styles.8162edfb.css} +1 -1
- solace_agent_mesh/assets/docs/assets/js/05749d90.19ac4f35.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/15ba94aa.e186750d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/15e40e79.434bb30f.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/17896441.e612dfb4.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/2279.550aa580.js +2 -0
- solace_agent_mesh/assets/docs/assets/js/{17896441.a5e82f9b.js.LICENSE.txt → 2279.550aa580.js.LICENSE.txt} +6 -0
- solace_agent_mesh/assets/docs/assets/js/240a0364.83e37aa8.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/2987107d.a80604f9.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/2e32b5e0.2f0db237.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/3a6c6137.7e61915d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/3ac1795d.7f7ab1c1.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/3ff0015d.e53c9b78.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/41adc471.0e95b87c.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/4667dc50.bf2ad456.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/49eed117.493d6f99.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{509e993c.4c7a1a6d.js → 509e993c.a1fbf45a.js} +1 -1
- solace_agent_mesh/assets/docs/assets/js/547e15cc.8e6da617.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/55b7b518.29d6e75d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/5b8d9c11.d4eb37b8.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/5c2bd65f.1ee87753.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/60702c0e.a8bdd79b.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/631738c7.fa471607.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/64195356.09dbd087.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/66d4869e.30340bd3.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/6a520c9d.b6e3f2ce.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/6aaedf65.7253541d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.a5b36a60.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/6d84eae0.fd23ba4a.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/71da7b71.374b9d54.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/729898df.7249e9fd.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/7e294c01.7c5f6906.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/8024126c.e3467286.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/81a99df0.7ed65d45.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/82fbfb93.161823a5.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/8b032486.91a91afc.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/924ffdeb.975e428a.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/94e8668d.16083b3f.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/9bb13469.4523ae20.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/a7d42657.a956689d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/a94703ab.3e5fbcb3.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/ab9708a8.3e563275.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/ad87452a.9d73dad6.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/c93cbaa0.0e0d8baf.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/cab03b5b.6a073091.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/cbe2e9ea.07e170dd.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/da0b5bad.b62f7b08.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/dd817ffc.c37a755e.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/dd81e2b8.b682e9c2.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/de915948.44a432bc.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/e04b235d.06d23db6.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/e1b6eeb4.deb2b62e.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/e3d9abda.1476f570.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/e6f9706b.acc800d3.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/e92d0134.c147a429.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/ee0c2fe7.94d0a351.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/f284c35a.cc97854c.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/ff4d71f2.74710fc1.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/main.d634009f.js +2 -0
- solace_agent_mesh/assets/docs/assets/js/runtime~main.27bb82a7.js +1 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/agents/index.html +68 -68
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/artifact-management/index.html +50 -50
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/audio-tools/index.html +42 -42
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/data-analysis-tools/index.html +55 -55
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/embeds/index.html +82 -68
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/image-tools/index.html +81 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/index.html +67 -50
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/research-tools/index.html +136 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/cli/index.html +178 -144
- solace_agent_mesh/assets/docs/docs/documentation/components/gateways/index.html +43 -42
- solace_agent_mesh/assets/docs/docs/documentation/components/index.html +20 -18
- solace_agent_mesh/assets/docs/docs/documentation/components/orchestrator/index.html +23 -23
- solace_agent_mesh/assets/docs/docs/documentation/components/platform-service/index.html +33 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/plugins/index.html +45 -45
- solace_agent_mesh/assets/docs/docs/documentation/components/projects/index.html +182 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/prompts/index.html +147 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/proxies/index.html +208 -125
- solace_agent_mesh/assets/docs/docs/documentation/components/speech/index.html +52 -0
- solace_agent_mesh/assets/docs/docs/documentation/deploying/debugging/index.html +28 -49
- solace_agent_mesh/assets/docs/docs/documentation/deploying/deployment-options/index.html +29 -30
- solace_agent_mesh/assets/docs/docs/documentation/deploying/index.html +14 -14
- solace_agent_mesh/assets/docs/docs/documentation/deploying/kubernetes/index.html +47 -0
- solace_agent_mesh/assets/docs/docs/documentation/deploying/kubernetes/kubernetes-deployment-guide/index.html +197 -0
- solace_agent_mesh/assets/docs/docs/documentation/deploying/logging/index.html +90 -0
- solace_agent_mesh/assets/docs/docs/documentation/deploying/observability/index.html +17 -16
- solace_agent_mesh/assets/docs/docs/documentation/deploying/proxy_configuration/index.html +49 -0
- solace_agent_mesh/assets/docs/docs/documentation/developing/create-agents/index.html +38 -38
- solace_agent_mesh/assets/docs/docs/documentation/developing/create-gateways/index.html +162 -171
- solace_agent_mesh/assets/docs/docs/documentation/developing/creating-python-tools/index.html +67 -49
- solace_agent_mesh/assets/docs/docs/documentation/developing/creating-service-providers/index.html +17 -17
- solace_agent_mesh/assets/docs/docs/documentation/developing/evaluations/index.html +51 -51
- solace_agent_mesh/assets/docs/docs/documentation/developing/index.html +22 -22
- solace_agent_mesh/assets/docs/docs/documentation/developing/structure/index.html +27 -27
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/bedrock-agents/index.html +135 -135
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/custom-agent/index.html +66 -66
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/event-mesh-gateway/index.html +51 -51
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mcp-integration/index.html +50 -38
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mongodb-integration/index.html +86 -86
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rag-integration/index.html +51 -51
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rest-gateway/index.html +24 -24
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/slack-integration/index.html +30 -30
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/sql-database/index.html +44 -44
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/teams-integration/index.html +115 -0
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/agent-builder/index.html +86 -0
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/connectors/index.html +67 -0
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +23 -19
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/installation/index.html +40 -37
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/openapi-tools/index.html +324 -0
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/rbac-setup-guide/index.html +112 -87
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/secure-user-delegated-access/index.html +440 -0
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/single-sign-on/index.html +87 -64
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/wheel-installation/index.html +62 -0
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/architecture/index.html +44 -44
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/index.html +39 -37
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +30 -30
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/try-agent-mesh/index.html +18 -18
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/vibe_coding/index.html +62 -0
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/artifact-storage/index.html +311 -0
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/configurations/index.html +39 -42
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/index.html +14 -14
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/installation/index.html +27 -25
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/large_language_models/index.html +69 -69
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/run-project/index.html +72 -72
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/session-storage/index.html +251 -0
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/user-feedback/index.html +88 -0
- solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-gateway-upgrade-to-0.3.0/index.html +42 -42
- solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-technical-migration-map/index.html +20 -20
- solace_agent_mesh/assets/docs/docs/documentation/migrations/platform-service-split/index.html +85 -0
- solace_agent_mesh/assets/docs/lunr-index-1768329217460.json +1 -0
- solace_agent_mesh/assets/docs/lunr-index.json +1 -1
- solace_agent_mesh/assets/docs/search-doc-1768329217460.json +1 -0
- solace_agent_mesh/assets/docs/search-doc.json +1 -1
- solace_agent_mesh/assets/docs/sitemap.xml +1 -1
- solace_agent_mesh/cli/__init__.py +1 -1
- solace_agent_mesh/cli/commands/add_cmd/__init__.py +3 -1
- solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +6 -1
- solace_agent_mesh/cli/commands/add_cmd/proxy_cmd.py +100 -0
- solace_agent_mesh/cli/commands/docs_cmd.py +4 -1
- solace_agent_mesh/cli/commands/eval_cmd.py +1 -1
- solace_agent_mesh/cli/commands/init_cmd/__init__.py +15 -0
- solace_agent_mesh/cli/commands/init_cmd/directory_step.py +1 -1
- solace_agent_mesh/cli/commands/init_cmd/env_step.py +30 -3
- solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +3 -4
- solace_agent_mesh/cli/commands/init_cmd/platform_service_step.py +85 -0
- solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +16 -3
- solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +2 -1
- solace_agent_mesh/cli/commands/plugin_cmd/catalog_cmd.py +1 -0
- solace_agent_mesh/cli/commands/plugin_cmd/create_cmd.py +3 -3
- solace_agent_mesh/cli/commands/run_cmd.py +64 -49
- solace_agent_mesh/cli/commands/tools_cmd.py +315 -0
- solace_agent_mesh/cli/main.py +15 -0
- solace_agent_mesh/client/webui/frontend/static/assets/{authCallback-BTf6dqwp.js → authCallback-KnKMP_vb.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/assets/client-DpBL2stg.js +25 -0
- solace_agent_mesh/client/webui/frontend/static/assets/main-Cd498TV2.js +435 -0
- solace_agent_mesh/client/webui/frontend/static/assets/main-rSf8Vu29.css +1 -0
- solace_agent_mesh/client/webui/frontend/static/assets/vendor-CGk8Suyh.js +565 -0
- solace_agent_mesh/client/webui/frontend/static/auth-callback.html +3 -3
- solace_agent_mesh/client/webui/frontend/static/index.html +4 -4
- solace_agent_mesh/client/webui/frontend/static/mockServiceWorker.js +336 -0
- solace_agent_mesh/client/webui/frontend/static/ui-version.json +6 -0
- solace_agent_mesh/common/a2a/events.py +2 -1
- solace_agent_mesh/common/a2a/protocol.py +5 -0
- solace_agent_mesh/common/a2a/types.py +2 -1
- solace_agent_mesh/common/a2a_spec/schemas/artifact_creation_progress.json +23 -6
- solace_agent_mesh/common/a2a_spec/schemas/feedback_event.json +51 -0
- solace_agent_mesh/common/agent_registry.py +38 -11
- solace_agent_mesh/common/data_parts.py +144 -4
- solace_agent_mesh/common/error_handlers.py +83 -0
- solace_agent_mesh/common/exceptions.py +24 -0
- solace_agent_mesh/common/oauth/__init__.py +17 -0
- solace_agent_mesh/common/oauth/oauth_client.py +408 -0
- solace_agent_mesh/common/oauth/utils.py +50 -0
- solace_agent_mesh/common/rag_dto.py +156 -0
- solace_agent_mesh/common/sac/sam_component_base.py +97 -19
- solace_agent_mesh/common/sam_events/event_service.py +2 -2
- solace_agent_mesh/common/services/employee_service.py +1 -1
- solace_agent_mesh/common/utils/embeds/constants.py +1 -0
- solace_agent_mesh/common/utils/embeds/converter.py +1 -8
- solace_agent_mesh/common/utils/embeds/modifiers.py +4 -28
- solace_agent_mesh/common/utils/embeds/resolver.py +152 -31
- solace_agent_mesh/common/utils/embeds/types.py +9 -0
- solace_agent_mesh/common/utils/log_formatters.py +20 -0
- solace_agent_mesh/common/utils/mime_helpers.py +12 -5
- solace_agent_mesh/common/utils/pydantic_utils.py +90 -3
- solace_agent_mesh/common/utils/rbac_utils.py +69 -0
- solace_agent_mesh/common/utils/templates/__init__.py +8 -0
- solace_agent_mesh/common/utils/templates/liquid_renderer.py +210 -0
- solace_agent_mesh/common/utils/templates/template_resolver.py +161 -0
- solace_agent_mesh/config_portal/backend/common.py +12 -0
- solace_agent_mesh/config_portal/frontend/static/client/assets/_index-CljP4_mv.js +103 -0
- solace_agent_mesh/config_portal/frontend/static/client/assets/{components-Rk0n-9cK.js → components-CaC6hG8d.js} +22 -22
- solace_agent_mesh/config_portal/frontend/static/client/assets/{entry.client-mvZjNKiz.js → entry.client-H_TM0YBt.js} +3 -3
- solace_agent_mesh/config_portal/frontend/static/client/assets/{index-DzNKzXrc.js → index-CnFykb2v.js} +16 -16
- solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-f8439d40.js +1 -0
- solace_agent_mesh/config_portal/frontend/static/client/assets/root-BIMqslJB.css +1 -0
- solace_agent_mesh/config_portal/frontend/static/client/assets/root-mJmTIdIk.js +10 -0
- solace_agent_mesh/config_portal/frontend/static/client/index.html +3 -3
- solace_agent_mesh/core_a2a/service.py +3 -2
- solace_agent_mesh/gateway/adapter/__init__.py +1 -0
- solace_agent_mesh/gateway/adapter/base.py +170 -0
- solace_agent_mesh/gateway/adapter/types.py +230 -0
- solace_agent_mesh/gateway/base/app.py +39 -2
- solace_agent_mesh/gateway/base/auth_interface.py +103 -0
- solace_agent_mesh/gateway/base/component.py +1027 -151
- solace_agent_mesh/gateway/generic/__init__.py +1 -0
- solace_agent_mesh/gateway/generic/app.py +50 -0
- solace_agent_mesh/gateway/generic/component.py +894 -0
- solace_agent_mesh/gateway/http_sse/alembic/env.py +0 -7
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251023_add_project_users_table.py +72 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251023_add_soft_delete_and_search.py +109 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251024_add_default_agent_to_projects.py +26 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251024_add_projects_table.py +135 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251108_create_prompt_tables_with_sharing.py +154 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251115_add_parent_task_id.py +32 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251126_add_background_task_fields.py +47 -0
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251202_add_versioned_fields_to_prompts.py +52 -0
- solace_agent_mesh/gateway/http_sse/alembic.ini +0 -36
- solace_agent_mesh/gateway/http_sse/app.py +40 -11
- solace_agent_mesh/gateway/http_sse/component.py +285 -160
- solace_agent_mesh/gateway/http_sse/dependencies.py +149 -114
- solace_agent_mesh/gateway/http_sse/main.py +68 -450
- solace_agent_mesh/gateway/http_sse/repository/__init__.py +19 -1
- solace_agent_mesh/gateway/http_sse/repository/chat_task_repository.py +2 -2
- solace_agent_mesh/gateway/http_sse/repository/entities/project.py +81 -0
- solace_agent_mesh/gateway/http_sse/repository/entities/project_user.py +47 -0
- solace_agent_mesh/gateway/http_sse/repository/entities/session.py +26 -3
- solace_agent_mesh/gateway/http_sse/repository/entities/task.py +7 -0
- solace_agent_mesh/gateway/http_sse/repository/feedback_repository.py +47 -0
- solace_agent_mesh/gateway/http_sse/repository/interfaces.py +114 -6
- solace_agent_mesh/gateway/http_sse/repository/models/__init__.py +13 -0
- solace_agent_mesh/gateway/http_sse/repository/models/project_model.py +51 -0
- solace_agent_mesh/gateway/http_sse/repository/models/project_user_model.py +75 -0
- solace_agent_mesh/gateway/http_sse/repository/models/prompt_model.py +159 -0
- solace_agent_mesh/gateway/http_sse/repository/models/session_model.py +8 -2
- solace_agent_mesh/gateway/http_sse/repository/models/task_model.py +8 -1
- solace_agent_mesh/gateway/http_sse/repository/project_repository.py +172 -0
- solace_agent_mesh/gateway/http_sse/repository/project_user_repository.py +186 -0
- solace_agent_mesh/gateway/http_sse/repository/session_repository.py +177 -11
- solace_agent_mesh/gateway/http_sse/repository/task_repository.py +86 -2
- solace_agent_mesh/gateway/http_sse/routers/agent_cards.py +38 -7
- solace_agent_mesh/gateway/http_sse/routers/artifacts.py +256 -58
- solace_agent_mesh/gateway/http_sse/routers/auth.py +168 -134
- solace_agent_mesh/gateway/http_sse/routers/config.py +302 -8
- solace_agent_mesh/gateway/http_sse/routers/dto/project_dto.py +69 -0
- solace_agent_mesh/gateway/http_sse/routers/dto/prompt_dto.py +255 -0
- solace_agent_mesh/gateway/http_sse/routers/dto/requests/project_requests.py +48 -0
- solace_agent_mesh/gateway/http_sse/routers/dto/requests/session_requests.py +14 -1
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/base_responses.py +1 -1
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/project_responses.py +31 -0
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/session_responses.py +5 -2
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/version_responses.py +31 -0
- solace_agent_mesh/gateway/http_sse/routers/feedback.py +133 -2
- solace_agent_mesh/gateway/http_sse/routers/people.py +2 -2
- solace_agent_mesh/gateway/http_sse/routers/projects.py +768 -0
- solace_agent_mesh/gateway/http_sse/routers/prompts.py +1416 -0
- solace_agent_mesh/gateway/http_sse/routers/sessions.py +167 -7
- solace_agent_mesh/gateway/http_sse/routers/speech.py +355 -0
- solace_agent_mesh/gateway/http_sse/routers/sse.py +131 -8
- solace_agent_mesh/gateway/http_sse/routers/tasks.py +670 -18
- solace_agent_mesh/gateway/http_sse/routers/users.py +1 -1
- solace_agent_mesh/gateway/http_sse/routers/version.py +343 -0
- solace_agent_mesh/gateway/http_sse/routers/visualization.py +92 -9
- solace_agent_mesh/gateway/http_sse/services/audio_service.py +1227 -0
- solace_agent_mesh/gateway/http_sse/services/background_task_monitor.py +186 -0
- solace_agent_mesh/gateway/http_sse/services/data_retention_service.py +1 -1
- solace_agent_mesh/gateway/http_sse/services/feedback_service.py +1 -1
- solace_agent_mesh/gateway/http_sse/services/project_service.py +930 -0
- solace_agent_mesh/gateway/http_sse/services/prompt_builder_assistant.py +303 -0
- solace_agent_mesh/gateway/http_sse/services/session_service.py +361 -12
- solace_agent_mesh/gateway/http_sse/services/task_logger_service.py +354 -4
- solace_agent_mesh/gateway/http_sse/session_manager.py +15 -15
- solace_agent_mesh/gateway/http_sse/sse_manager.py +286 -166
- solace_agent_mesh/gateway/http_sse/utils/artifact_copy_utils.py +370 -0
- solace_agent_mesh/gateway/http_sse/utils/stim_utils.py +41 -1
- solace_agent_mesh/services/__init__.py +0 -0
- solace_agent_mesh/services/platform/__init__.py +29 -0
- solace_agent_mesh/services/platform/alembic/env.py +85 -0
- solace_agent_mesh/services/platform/alembic/script.py.mako +28 -0
- solace_agent_mesh/services/platform/alembic.ini +109 -0
- solace_agent_mesh/services/platform/api/__init__.py +3 -0
- solace_agent_mesh/services/platform/api/dependencies.py +154 -0
- solace_agent_mesh/services/platform/api/main.py +314 -0
- solace_agent_mesh/services/platform/api/middleware.py +51 -0
- solace_agent_mesh/services/platform/api/routers/__init__.py +33 -0
- solace_agent_mesh/services/platform/api/routers/health_router.py +31 -0
- solace_agent_mesh/services/platform/app.py +215 -0
- solace_agent_mesh/services/platform/component.py +777 -0
- solace_agent_mesh/shared/__init__.py +14 -0
- solace_agent_mesh/shared/api/__init__.py +42 -0
- solace_agent_mesh/shared/auth/__init__.py +26 -0
- solace_agent_mesh/shared/auth/dependencies.py +204 -0
- solace_agent_mesh/shared/auth/middleware.py +347 -0
- solace_agent_mesh/shared/database/__init__.py +20 -0
- solace_agent_mesh/{gateway/http_sse/shared → shared/database}/base_repository.py +1 -1
- solace_agent_mesh/{gateway/http_sse/shared → shared/database}/database_exceptions.py +1 -1
- solace_agent_mesh/{gateway/http_sse/shared → shared/database}/database_helpers.py +1 -1
- solace_agent_mesh/shared/exceptions/__init__.py +36 -0
- solace_agent_mesh/{gateway/http_sse/shared → shared/exceptions}/exception_handlers.py +19 -5
- solace_agent_mesh/shared/utils/__init__.py +21 -0
- solace_agent_mesh/templates/logging_config_template.yaml +48 -0
- solace_agent_mesh/templates/main_orchestrator.yaml +12 -1
- solace_agent_mesh/templates/platform.yaml +49 -0
- solace_agent_mesh/templates/plugin_readme_template.md +3 -25
- solace_agent_mesh/templates/plugin_tool_config_template.yaml +109 -0
- solace_agent_mesh/templates/proxy_template.yaml +62 -0
- solace_agent_mesh/templates/webui.yaml +148 -6
- solace_agent_mesh/tools/web_search/__init__.py +18 -0
- solace_agent_mesh/tools/web_search/base.py +84 -0
- solace_agent_mesh/tools/web_search/google_search.py +247 -0
- solace_agent_mesh/tools/web_search/models.py +99 -0
- {solace_agent_mesh-1.6.1.dist-info → solace_agent_mesh-1.13.2.dist-info}/METADATA +31 -12
- solace_agent_mesh-1.13.2.dist-info/RECORD +591 -0
- {solace_agent_mesh-1.6.1.dist-info → solace_agent_mesh-1.13.2.dist-info}/WHEEL +1 -1
- solace_agent_mesh/agent/adk/adk_llm.txt +0 -232
- solace_agent_mesh/agent/adk/adk_llm_detail.txt +0 -566
- solace_agent_mesh/agent/adk/artifacts/artifacts_llm.txt +0 -171
- solace_agent_mesh/agent/adk/models/models_llm.txt +0 -142
- solace_agent_mesh/agent/agent_llm.txt +0 -378
- solace_agent_mesh/agent/agent_llm_detail.txt +0 -1702
- solace_agent_mesh/agent/protocol/protocol_llm.txt +0 -81
- solace_agent_mesh/agent/protocol/protocol_llm_detail.txt +0 -92
- solace_agent_mesh/agent/sac/sac_llm.txt +0 -189
- solace_agent_mesh/agent/sac/sac_llm_detail.txt +0 -200
- solace_agent_mesh/agent/testing/testing_llm.txt +0 -57
- solace_agent_mesh/agent/testing/testing_llm_detail.txt +0 -68
- solace_agent_mesh/agent/tools/tools_llm.txt +0 -263
- solace_agent_mesh/agent/tools/tools_llm_detail.txt +0 -274
- solace_agent_mesh/agent/utils/utils_llm.txt +0 -138
- solace_agent_mesh/agent/utils/utils_llm_detail.txt +0 -149
- solace_agent_mesh/assets/docs/assets/js/15ba94aa.932dd2db.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js +0 -2
- solace_agent_mesh/assets/docs/assets/js/240a0364.7eac6021.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/2e32b5e0.33f5d75b.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/3a6c6137.f5940cfa.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/3ac1795d.76654dd9.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/3ff0015d.2be20244.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/547e15cc.2cbb060a.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/55b7b518.f2b1d1ba.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/5c2bd65f.eda4bcb2.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/631738c7.a8b1ef8b.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/6a520c9d.ba015d81.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.f4b15f3b.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/6d84eae0.4a5fbf39.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/71da7b71.38583438.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/8024126c.56e59919.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/81a99df0.07034dd9.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/82fbfb93.139a1a1f.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/924ffdeb.8095e148.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/94e8668d.b5ddb7a1.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/9bb13469.dd1c9b54.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/a94703ab.0438dbc2.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/ab9708a8.3e6dd091.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/c93cbaa0.eaff365e.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/da0b5bad.d08a9466.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/dd817ffc.0aa9630a.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/dd81e2b8.d590bc9e.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/de915948.27d6b065.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/e3d9abda.6b9493d0.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/e6f9706b.e74a984d.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/e92d0134.cf6d6522.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/f284c35a.42f59cdd.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/ff4d71f2.15b02f97.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/main.b12eac43.js +0 -2
- solace_agent_mesh/assets/docs/assets/js/runtime~main.e268214e.js +0 -1
- solace_agent_mesh/assets/docs/lunr-index-1761248203150.json +0 -1
- solace_agent_mesh/assets/docs/search-doc-1761248203150.json +0 -1
- solace_agent_mesh/cli/commands/add_cmd/add_cmd_llm.txt +0 -250
- solace_agent_mesh/cli/commands/init_cmd/init_cmd_llm.txt +0 -365
- solace_agent_mesh/cli/commands/plugin_cmd/plugin_cmd_llm.txt +0 -305
- solace_agent_mesh/client/webui/frontend/static/assets/client-CaY59VuC.js +0 -25
- solace_agent_mesh/client/webui/frontend/static/assets/main-B32noGmR.js +0 -342
- solace_agent_mesh/client/webui/frontend/static/assets/main-DHJKSW1S.css +0 -1
- solace_agent_mesh/client/webui/frontend/static/assets/vendor-BEmvJSYz.js +0 -405
- solace_agent_mesh/common/a2a/a2a_llm.txt +0 -182
- solace_agent_mesh/common/a2a/a2a_llm_detail.txt +0 -193
- solace_agent_mesh/common/a2a_spec/a2a_spec_llm.txt +0 -407
- solace_agent_mesh/common/a2a_spec/a2a_spec_llm_detail.txt +0 -736
- solace_agent_mesh/common/a2a_spec/schemas/schemas_llm.txt +0 -313
- solace_agent_mesh/common/common_llm.txt +0 -251
- solace_agent_mesh/common/common_llm_detail.txt +0 -2562
- solace_agent_mesh/common/middleware/middleware_llm.txt +0 -174
- solace_agent_mesh/common/middleware/middleware_llm_detail.txt +0 -185
- solace_agent_mesh/common/sac/sac_llm.txt +0 -71
- solace_agent_mesh/common/sac/sac_llm_detail.txt +0 -82
- solace_agent_mesh/common/sam_events/sam_events_llm.txt +0 -104
- solace_agent_mesh/common/sam_events/sam_events_llm_detail.txt +0 -115
- solace_agent_mesh/common/services/providers/providers_llm.txt +0 -80
- solace_agent_mesh/common/services/services_llm.txt +0 -363
- solace_agent_mesh/common/services/services_llm_detail.txt +0 -459
- solace_agent_mesh/common/utils/embeds/embeds_llm.txt +0 -220
- solace_agent_mesh/common/utils/utils_llm.txt +0 -336
- solace_agent_mesh/common/utils/utils_llm_detail.txt +0 -572
- solace_agent_mesh/config_portal/frontend/static/client/assets/_index-ByU1X1HD.js +0 -98
- solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-61038fc6.js +0 -1
- solace_agent_mesh/config_portal/frontend/static/client/assets/root-BWvk5-gF.js +0 -10
- solace_agent_mesh/config_portal/frontend/static/client/assets/root-DxRwaWiE.css +0 -1
- solace_agent_mesh/core_a2a/core_a2a_llm.txt +0 -90
- solace_agent_mesh/core_a2a/core_a2a_llm_detail.txt +0 -101
- solace_agent_mesh/gateway/base/base_llm.txt +0 -224
- solace_agent_mesh/gateway/base/base_llm_detail.txt +0 -235
- solace_agent_mesh/gateway/gateway_llm.txt +0 -373
- solace_agent_mesh/gateway/gateway_llm_detail.txt +0 -3885
- solace_agent_mesh/gateway/http_sse/alembic/alembic_llm.txt +0 -295
- solace_agent_mesh/gateway/http_sse/alembic/versions/versions_llm.txt +0 -155
- solace_agent_mesh/gateway/http_sse/components/components_llm.txt +0 -105
- solace_agent_mesh/gateway/http_sse/http_sse_llm.txt +0 -299
- solace_agent_mesh/gateway/http_sse/http_sse_llm_detail.txt +0 -3278
- solace_agent_mesh/gateway/http_sse/repository/entities/entities_llm.txt +0 -263
- solace_agent_mesh/gateway/http_sse/repository/models/models_llm.txt +0 -266
- solace_agent_mesh/gateway/http_sse/repository/repository_llm.txt +0 -340
- solace_agent_mesh/gateway/http_sse/routers/dto/dto_llm.txt +0 -346
- solace_agent_mesh/gateway/http_sse/routers/dto/requests/requests_llm.txt +0 -83
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/responses_llm.txt +0 -107
- solace_agent_mesh/gateway/http_sse/routers/routers_llm.txt +0 -314
- solace_agent_mesh/gateway/http_sse/services/services_llm.txt +0 -297
- solace_agent_mesh/gateway/http_sse/shared/__init__.py +0 -146
- solace_agent_mesh/gateway/http_sse/shared/shared_llm.txt +0 -285
- solace_agent_mesh/gateway/http_sse/utils/utils_llm.txt +0 -47
- solace_agent_mesh/llm.txt +0 -228
- solace_agent_mesh/llm_detail.txt +0 -2835
- solace_agent_mesh/solace_agent_mesh_llm.txt +0 -362
- solace_agent_mesh/solace_agent_mesh_llm_detail.txt +0 -8599
- solace_agent_mesh/templates/logging_config_template.ini +0 -45
- solace_agent_mesh/templates/templates_llm.txt +0 -147
- solace_agent_mesh-1.6.1.dist-info/RECORD +0 -525
- /solace_agent_mesh/assets/docs/assets/js/{main.b12eac43.js.LICENSE.txt → main.d634009f.js.LICENSE.txt} +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/api}/auth_utils.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/api}/pagination.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/api}/response_utils.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/exceptions}/error_dto.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/exceptions}/exceptions.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/utils}/enums.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/utils}/timestamp_utils.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/utils}/types.py +0 -0
- /solace_agent_mesh/{gateway/http_sse/shared → shared/utils}/utils.py +0 -0
- {solace_agent_mesh-1.6.1.dist-info → solace_agent_mesh-1.13.2.dist-info}/entry_points.txt +0 -0
- {solace_agent_mesh-1.6.1.dist-info → solace_agent_mesh-1.13.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,58 +2,58 @@
|
|
|
2
2
|
<html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-documentation/developing/evaluations" data-has-hydrated="false">
|
|
3
3
|
<head>
|
|
4
4
|
<meta charset="UTF-8">
|
|
5
|
-
<meta name="generator" content="Docusaurus v3.
|
|
6
|
-
<title data-rh="true">Evaluating Agents | Solace Agent Mesh</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://solacelabs.github.io/solace-agent-mesh/img/logo.png"><meta data-rh="true" name="twitter:image" content="https://solacelabs.github.io/solace-agent-mesh/img/logo.png"><meta data-rh="true" property="og:url" content="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Evaluating Agents | Solace Agent Mesh"><meta data-rh="true" name="description" content="The framework includes an evaluation system that helps you test your agents' behavior in a structured way. You can define test suites, run them against your agents, and generate detailed reports to analyze the results. When running evaluations locally, you can also benchmark different language models to see how they affect your agents' responses."><meta data-rh="true" property="og:description" content="The framework includes an evaluation system that helps you test your agents' behavior in a structured way. You can define test suites, run them against your agents, and generate detailed reports to analyze the results. When running evaluations locally, you can also benchmark different language models to see how they affect your agents' responses."><link data-rh="true" rel="icon" href="/solace-agent-mesh/img/logo.png"><link data-rh="true" rel="canonical" href="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations"><link data-rh="true" rel="alternate" href="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations" hreflang="en"><link data-rh="true" rel="alternate" href="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations" hreflang="x-default"><script data-rh="true" type="application/ld+json">{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Developing with Agent Mesh","item":"https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/"},{"@type":"ListItem","position":2,"name":"Evaluating Agents","item":"https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations"}]}</script><link rel="stylesheet" href="/solace-agent-mesh/assets/css/styles.
|
|
7
|
-
<script src="/solace-agent-mesh/assets/js/runtime~main.
|
|
8
|
-
<script src="/solace-agent-mesh/assets/js/main.
|
|
5
|
+
<meta name="generator" content="Docusaurus v3.9.2">
|
|
6
|
+
<title data-rh="true">Evaluating Agents | Solace Agent Mesh</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://solacelabs.github.io/solace-agent-mesh/img/logo.png"><meta data-rh="true" name="twitter:image" content="https://solacelabs.github.io/solace-agent-mesh/img/logo.png"><meta data-rh="true" property="og:url" content="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Evaluating Agents | Solace Agent Mesh"><meta data-rh="true" name="description" content="The framework includes an evaluation system that helps you test your agents' behavior in a structured way. You can define test suites, run them against your agents, and generate detailed reports to analyze the results. When running evaluations locally, you can also benchmark different language models to see how they affect your agents' responses."><meta data-rh="true" property="og:description" content="The framework includes an evaluation system that helps you test your agents' behavior in a structured way. You can define test suites, run them against your agents, and generate detailed reports to analyze the results. When running evaluations locally, you can also benchmark different language models to see how they affect your agents' responses."><link data-rh="true" rel="icon" href="/solace-agent-mesh/img/logo.png"><link data-rh="true" rel="canonical" href="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations"><link data-rh="true" rel="alternate" href="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations" hreflang="en"><link data-rh="true" rel="alternate" href="https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations" hreflang="x-default"><script data-rh="true" type="application/ld+json">{"@context":"https://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Developing with Agent Mesh","item":"https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/"},{"@type":"ListItem","position":2,"name":"Evaluating Agents","item":"https://solacelabs.github.io/solace-agent-mesh/docs/documentation/developing/evaluations"}]}</script><link rel="stylesheet" href="/solace-agent-mesh/assets/css/styles.8162edfb.css">
|
|
7
|
+
<script src="/solace-agent-mesh/assets/js/runtime~main.27bb82a7.js" defer="defer"></script>
|
|
8
|
+
<script src="/solace-agent-mesh/assets/js/main.d634009f.js" defer="defer"></script>
|
|
9
9
|
</head>
|
|
10
10
|
<body class="navigation-with-keyboard">
|
|
11
|
-
<svg
|
|
11
|
+
<svg style="display: none;"><defs>
|
|
12
12
|
<symbol id="theme-svg-external-link" viewBox="0 0 24 24"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"/></symbol>
|
|
13
13
|
</defs></svg>
|
|
14
|
-
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{const c=new URLSearchParams(window.location.search).entries();for(var[t,e]of c)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><link rel="preload" as="image" href="/solace-agent-mesh/img/logo.png"><link rel="preload" as="image" href="/solace-agent-mesh/img/solace-logo.png"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="theme-layout-navbar navbar navbar--fixed-top"><div class="navbar__inner"><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/solace-agent-mesh/docs/documentation/getting-started"><div class="navbar__logo"><img src="/solace-agent-mesh/img/logo.png" alt="Solace Agent Mesh Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src="/solace-agent-mesh/img/logo.png" alt="Solace Agent Mesh Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div><b class="navbar__title text--truncate">Solace Agent Mesh</b></a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/solace-agent-mesh/docs/documentation/getting-started/">Documentation</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href="https://github.com/SolaceLabs/solace-agent-mesh/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill="currentColor" d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input id="search_input_react" type="search" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class="docsWrapper_hBAB"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docRoot_UBD9"><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class="sidebarViewport_aRkj"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/getting-started/">Getting Started</a><button aria-label="Collapse sidebar category 'Getting Started'" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/introduction">What is Agent Mesh?</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/try-agent-mesh">Try Agent Mesh</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/architecture">Architecture Overview</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/components/">Components</a><button aria-label="Expand sidebar category 'Components'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/installing-and-configuring/">Installing and Configuring Agent Mesh</a><button aria-label="Expand sidebar category 'Installing and Configuring Agent Mesh'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--active" href="/solace-agent-mesh/docs/documentation/developing/">Developing with Agent Mesh</a><button aria-label="Collapse sidebar category 'Developing with Agent Mesh'" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/structure">Project Structure</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/create-agents">Creating Agents</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/create-gateways">Create Gateways</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/creating-python-tools">Creating Python Tools</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/creating-service-providers">Creating Service Providers</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/evaluations">Evaluating Agents</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/tutorials/custom-agent">Tutorials</a></div></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/deploying/">Deploying Agent Mesh</a><button aria-label="Expand sidebar category 'Deploying Agent Mesh'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/solace-agent-mesh/docs/documentation/migrations/a2a-upgrade/a2a-gateway-upgrade-to-0.3.0">Migrations</a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/enterprise/">Agent Mesh Enterprise</a><button aria-label="Expand sidebar category 'Agent Mesh Enterprise'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li></ul></nav></div></div></aside><main class="docMainContainer_TBSr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs"><li class="breadcrumbs__item"><a class="breadcrumbs__link" href="/solace-agent-mesh/docs/documentation/developing/"><span>Developing with Agent Mesh</span></a></li><li class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link">Evaluating Agents</span></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Evaluating Agents</h1></header>
|
|
14
|
+
<script>!function(){var t=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();document.documentElement.setAttribute("data-theme",t||(window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light")),document.documentElement.setAttribute("data-theme-choice",t||"system")}(),function(){try{const c=new URLSearchParams(window.location.search).entries();for(var[t,e]of c)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><link rel="preload" as="image" href="/solace-agent-mesh/img/logo.png"><link rel="preload" as="image" href="/solace-agent-mesh/img/solace-logo.png"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="theme-layout-navbar navbar navbar--fixed-top"><div class="navbar__inner"><div class="theme-layout-navbar-left navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/solace-agent-mesh/docs/documentation/getting-started"><div class="navbar__logo"><img src="/solace-agent-mesh/img/logo.png" alt="Solace Agent Mesh Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src="/solace-agent-mesh/img/logo.png" alt="Solace Agent Mesh Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div><b class="navbar__title text--truncate">Solace Agent Mesh</b></a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/solace-agent-mesh/docs/documentation/getting-started/">Documentation</a></div><div class="theme-layout-navbar-right navbar__items navbar__items--right"><a href="https://github.com/SolaceLabs/solace-agent-mesh/" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="system mode" aria-label="Switch between dark and light mode (currently system mode)"><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" aria-hidden="true" class="toggleIcon_g3eP systemToggleIcon_QzmC"><path fill="currentColor" d="m12 21c4.971 0 9-4.029 9-9s-4.029-9-9-9-9 4.029-9 9 4.029 9 9 9zm4.95-13.95c1.313 1.313 2.05 3.093 2.05 4.95s-0.738 3.637-2.05 4.95c-1.313 1.313-3.093 2.05-4.95 2.05v-14c1.857 0 3.637 0.737 4.95 2.05z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input id="search_input_react" type="search" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="theme-layout-main main-wrapper mainWrapper_z2l0"><div class="docsWrapper_hBAB"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docRoot_UBD9"><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class="sidebarViewport_aRkj"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/getting-started/"><span title="Getting Started" class="categoryLinkLabel_W154">Getting Started</span></a><button aria-label="Collapse sidebar category 'Getting Started'" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/introduction"><span title="What is Agent Mesh?" class="linkLabel_WmDU">What is Agent Mesh?</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/try-agent-mesh"><span title="Try Agent Mesh" class="linkLabel_WmDU">Try Agent Mesh</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/architecture"><span title="Architecture Overview" class="linkLabel_WmDU">Architecture Overview</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/getting-started/vibe_coding"><span title="Vibe Coding" class="linkLabel_WmDU">Vibe Coding</span></a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/components/"><span title="Components" class="categoryLinkLabel_W154">Components</span></a><button aria-label="Expand sidebar category 'Components'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/installing-and-configuring/"><span title="Installing and Configuring Agent Mesh" class="categoryLinkLabel_W154">Installing and Configuring Agent Mesh</span></a><button aria-label="Expand sidebar category 'Installing and Configuring Agent Mesh'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--active" href="/solace-agent-mesh/docs/documentation/developing/"><span title="Developing with Agent Mesh" class="categoryLinkLabel_W154">Developing with Agent Mesh</span></a><button aria-label="Collapse sidebar category 'Developing with Agent Mesh'" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/structure"><span title="Project Structure" class="linkLabel_WmDU">Project Structure</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/create-agents"><span title="Creating Agents" class="linkLabel_WmDU">Creating Agents</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/create-gateways"><span title="Creating Custom Gateways" class="linkLabel_WmDU">Creating Custom Gateways</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/creating-python-tools"><span title="Creating Python Tools" class="linkLabel_WmDU">Creating Python Tools</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/creating-service-providers"><span title="Creating Service Providers" class="linkLabel_WmDU">Creating Service Providers</span></a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/evaluations"><span title="Evaluating Agents" class="linkLabel_WmDU">Evaluating Agents</span></a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" tabindex="0" href="/solace-agent-mesh/docs/documentation/developing/tutorials/custom-agent"><span title="Tutorials" class="categoryLinkLabel_W154">Tutorials</span></a></div></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/deploying/"><span title="Deploying Agent Mesh" class="categoryLinkLabel_W154">Deploying Agent Mesh</span></a><button aria-label="Expand sidebar category 'Deploying Agent Mesh'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist menu__link--sublist-caret" role="button" aria-expanded="false" href="/solace-agent-mesh/docs/documentation/migrations/platform-service-split"><span title="Migrations" class="categoryLinkLabel_W154">Migrations</span></a></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="categoryLink_byQd menu__link menu__link--sublist" href="/solace-agent-mesh/docs/documentation/enterprise/"><span title="Agent Mesh Enterprise" class="categoryLinkLabel_W154">Agent Mesh Enterprise</span></a><button aria-label="Expand sidebar category 'Agent Mesh Enterprise'" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li></ul></nav></div></div></aside><main class="docMainContainer_TBSr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs"><li class="breadcrumbs__item"><a class="breadcrumbs__link" href="/solace-agent-mesh/docs/documentation/developing/"><span>Developing with Agent Mesh</span></a></li><li class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link">Evaluating Agents</span></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Evaluating Agents</h1></header>
|
|
15
15
|
<p>The framework includes an evaluation system that helps you test your agents' behavior in a structured way. You can define test suites, run them against your agents, and generate detailed reports to analyze the results. When running evaluations locally, you can also benchmark different language models to see how they affect your agents' responses.</p>
|
|
16
16
|
<p>At a high level, the evaluation process involves three main components:</p>
|
|
17
17
|
<ul>
|
|
18
|
-
<li><strong>Test Case</strong>: A <code>JSON</code> file that defines a single, specific task for an agent to perform. It includes the initial prompt, any required files (artifacts), and the criteria for a successful outcome.</li>
|
|
19
|
-
<li><strong>Test Suite</strong>: A <code>JSON</code> file that groups one or more test cases into a single evaluation run. It also defines the environment for the evaluation, such as whether to run the agents locally or connect to a remote Agent Mesh.</li>
|
|
20
|
-
<li><strong>Evaluation Settings</strong>: A configuration block within the test suite that specifies how to score the agent's performance. You can choose from several methods, from simple metric-based comparisons to more advanced evaluations using a language model.</li>
|
|
18
|
+
<li class=""><strong>Test Case</strong>: A <code>JSON</code> file that defines a single, specific task for an agent to perform. It includes the initial prompt, any required files (artifacts), and the criteria for a successful outcome.</li>
|
|
19
|
+
<li class=""><strong>Test Suite</strong>: A <code>JSON</code> file that groups one or more test cases into a single evaluation run. It also defines the environment for the evaluation, such as whether to run the agents locally or connect to a remote Agent Mesh.</li>
|
|
20
|
+
<li class=""><strong>Evaluation Settings</strong>: A configuration block within the test suite that specifies how to score the agent's performance. You can choose from several methods, from simple metric-based comparisons to more advanced evaluations using a language model.</li>
|
|
21
21
|
</ul>
|
|
22
22
|
<p>This document guides you through creating test cases, assembling them into test suites, and running evaluations to test your agents.</p>
|
|
23
|
-
<h2 class="anchor
|
|
23
|
+
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="creating-a-test-case">Creating a Test Case<a href="#creating-a-test-case" class="hash-link" aria-label="Direct link to Creating a Test Case" title="Direct link to Creating a Test Case" translate="no"></a></h2>
|
|
24
24
|
<p>A test case is a <code>JSON</code> file that defines a specific task for an agent to perform. It serves as the fundamental building block of an evaluation. You create a test case to represent a single interaction you want to test, such as asking a question, providing a file for processing, or requesting a specific action from an agent.</p>
|
|
25
|
-
<h3 class="anchor
|
|
25
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="test-case-configuration">Test Case Configuration<a href="#test-case-configuration" class="hash-link" aria-label="Direct link to Test Case Configuration" title="Direct link to Test Case Configuration" translate="no"></a></h3>
|
|
26
26
|
<p>The following fields are available in the test case <code>JSON</code> file.</p>
|
|
27
27
|
<ul>
|
|
28
|
-
<li><code>test_case_id</code> (Required): A unique identifier for the test case.</li>
|
|
29
|
-
<li><code>query</code> (Required): The initial prompt to be sent to the agent.</li>
|
|
30
|
-
<li><code>target_agent</code> (Required): The name of the agent to which the query should be sent.</li>
|
|
31
|
-
<li><code>category</code> (Optional): The category of the test case. Defaults to <code>Other</code>.</li>
|
|
32
|
-
<li><code>description</code> (Optional): A description of the test case.</li>
|
|
33
|
-
<li><code>artifacts</code> (Optional): A list of artifacts to be sent with the initial query. Each artifact has a <code>type</code> and a <code>path</code>.</li>
|
|
34
|
-
<li><code>wait_time</code> (Optional): The maximum time in seconds to wait for a response from the agent. Defaults to <code>60</code>.</li>
|
|
35
|
-
<li><code>evaluation</code> (Optional): The evaluation criteria for the test case.<!-- -->
|
|
28
|
+
<li class=""><code>test_case_id</code> (Required): A unique identifier for the test case.</li>
|
|
29
|
+
<li class=""><code>query</code> (Required): The initial prompt to be sent to the agent.</li>
|
|
30
|
+
<li class=""><code>target_agent</code> (Required): The name of the agent to which the query should be sent.</li>
|
|
31
|
+
<li class=""><code>category</code> (Optional): The category of the test case. Defaults to <code>Other</code>.</li>
|
|
32
|
+
<li class=""><code>description</code> (Optional): A description of the test case.</li>
|
|
33
|
+
<li class=""><code>artifacts</code> (Optional): A list of artifacts to be sent with the initial query. Each artifact has a <code>type</code> and a <code>path</code>.</li>
|
|
34
|
+
<li class=""><code>wait_time</code> (Optional): The maximum time in seconds to wait for a response from the agent. Defaults to <code>60</code>.</li>
|
|
35
|
+
<li class=""><code>evaluation</code> (Optional): The evaluation criteria for the test case.<!-- -->
|
|
36
36
|
<ul>
|
|
37
|
-
<li><code>expected_tools</code> (Optional): A list of tools that the agent is expected to use. Defaults to an empty list.</li>
|
|
38
|
-
<li><code>expected_response</code> (Optional): The expected final response from the agent. Defaults to an empty string.</li>
|
|
39
|
-
<li><code>criterion</code> (Optional): The criterion to be used by the <code>llm_evaluator</code>. Defaults to an empty string.</li>
|
|
37
|
+
<li class=""><code>expected_tools</code> (Optional): A list of tools that the agent is expected to use. Defaults to an empty list.</li>
|
|
38
|
+
<li class=""><code>expected_response</code> (Optional): The expected final response from the agent. Defaults to an empty string.</li>
|
|
39
|
+
<li class=""><code>criterion</code> (Optional): The criterion to be used by the <code>llm_evaluator</code>. Defaults to an empty string.</li>
|
|
40
40
|
</ul>
|
|
41
41
|
</li>
|
|
42
42
|
</ul>
|
|
43
|
-
<h3 class="anchor
|
|
43
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="test-case-examples">Test Case Examples<a href="#test-case-examples" class="hash-link" aria-label="Direct link to Test Case Examples" title="Direct link to Test Case Examples" translate="no"></a></h3>
|
|
44
44
|
<p>Here is an example of a simple test case. It sends a greeting to an agent and checks for a standard response.</p>
|
|
45
45
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"test_case_id"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"hello_world"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"category"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"Content Generation"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"description"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"A simple test case to check the basic functionality of the system."</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"query"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"Hello, world!"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"target_agent"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"OrchestratorAgent"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"wait_time"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">30</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"evaluation"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"expected_tools"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"expected_response"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"Hello! How can I help you today?"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"criterion"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"Evaluate if the agent provides a standard greeting."</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
46
46
|
<p>This next example shows a more complex test case. It includes a <code>CSV</code> file as an artifact and asks the agent to filter the data in the file.</p>
|
|
47
47
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"test_case_id"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"filter_csv_employees_by_age_and_country"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"category"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"Tool Usage"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"description"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"A test case to filter employees from a CSV file based on age and country."</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"target_agent"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"OrchestratorAgent"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"query"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"From the attached CSV, please list the names of all people who are older or equal to 30 and live in the USA."</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"artifacts"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"type"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"file"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"path"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"artifacts/sample.csv"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"wait_time"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">120</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"evaluation"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"expected_tools"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token string" style="color:rgb(255, 121, 198)">"extract_content_from_artifact"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"expected_response"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"The person who is 30 or older and lives in the USA is John Doe."</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"criterion"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"Evaluate if the agent correctly filters the CSV data."</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
48
|
-
<h2 class="anchor
|
|
48
|
+
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="creating-a-test-suite">Creating a Test Suite<a href="#creating-a-test-suite" class="hash-link" aria-label="Direct link to Creating a Test Suite" title="Direct link to Creating a Test Suite" translate="no"></a></h2>
|
|
49
49
|
<p>The test suite is a <code>JSON</code> file that defines the parameters of an evaluation run. You use it to group test cases and configure the environment in which they run.</p>
|
|
50
50
|
<p>A common convention in the test suite configuration is to use keys ending with <code>_VAR</code>. These keys indicate that the corresponding value is the name of an environment variable from which the framework should read the actual value. This practice helps you keep sensitive information—like API keys and credentials—out of your configuration files. This convention applies to the <code>broker</code> object, the <code>env</code> object within <code>llm_models</code>, and the <code>env</code> object within the <code>llm_evaluator</code> in <code>evaluation_settings</code>.</p>
|
|
51
51
|
<p>You can run evaluations in two modes: local and remote. Both modes require a connection to a Solace event broker to function.</p>
|
|
52
|
-
<h3 class="anchor
|
|
52
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="local-evaluation">Local Evaluation<a href="#local-evaluation" class="hash-link" aria-label="Direct link to Local Evaluation" title="Direct link to Local Evaluation" translate="no"></a></h3>
|
|
53
53
|
<p>In a local evaluation, the evaluation framework brings up a local instance of Solace Agent Mesh (SAM) and runs the agents on your local machine. This mode is useful for development and testing because it allows you to iterate quickly on your agents and test cases. You can also use this mode to benchmark different language models against your agents to see how they perform.</p>
|
|
54
54
|
<p>To run a local evaluation, you need to install the <code>sam-rest-gateway</code> plugin. This plugin allows the evaluation framework to communicate with the local SAM instance. You can install it with the following command:</p>
|
|
55
55
|
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token plain">pip install "sam-rest-gateway @ git+https://github.com/SolaceLabs/solace-agent-mesh-core-plugins#subdirectory=sam-rest-gateway"</span><br></span></code></pre></div></div>
|
|
56
|
-
<h4 class="anchor
|
|
56
|
+
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id="local-test-suite-configuration">Local Test Suite Configuration<a href="#local-test-suite-configuration" class="hash-link" aria-label="Direct link to Local Test Suite Configuration" title="Direct link to Local Test Suite Configuration" translate="no"></a></h4>
|
|
57
57
|
<p>For a local evaluation, you must define the <code>agents</code>, <code>broker</code>, <code>llm_models</code>, and <code>test_cases</code> fields.</p>
|
|
58
58
|
<p>The <code>agents</code> field is a required list of paths to the agent configuration files. You must specify at least one agent.</p>
|
|
59
59
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token property">"agents"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"examples/agents/a2a_agents_example.yaml"</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><br></span></code></pre></div></div>
|
|
@@ -72,11 +72,11 @@
|
|
|
72
72
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token property">"workers"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">8</span><br></span></code></pre></div></div>
|
|
73
73
|
<p>The <code>evaluation_settings</code> field is an optional object that allows you to configure the evaluation. This object can contain <code>tool_match</code>, <code>response_match</code>, and <code>llm_evaluator</code> settings.</p>
|
|
74
74
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token property">"evaluation_settings"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"tool_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"response_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"llm_evaluator"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"env"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_PLANNING_MODEL_NAME"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"openai/gemini-2.5-pro"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_ENDPOINT_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_ENDPOINT"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_API_KEY_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_API_KEY"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
75
|
-
<h4 class="anchor
|
|
75
|
+
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id="example-local-test-suite">Example Local Test Suite<a href="#example-local-test-suite" class="hash-link" aria-label="Direct link to Example Local Test Suite" title="Direct link to Example Local Test Suite" translate="no"></a></h4>
|
|
76
76
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"agents"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"examples/agents/a2a_agents_example.yaml"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"examples/agents/multimodal_example.yaml"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"examples/agents/orchestrator_example.yaml"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"broker"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_URL_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_URL"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_USERNAME_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_USERNAME"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_PASSWORD_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_PASSWORD"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_VPN_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_VPN"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"llm_models"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"name"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"gpt-4-1"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"env"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_PLANNING_MODEL_NAME"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"openai/azure-gpt-4-1"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_ENDPOINT_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_ENDPOINT"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_API_KEY_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_API_KEY"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"results_dir_name"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"sam-local-eval-test"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"runs"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">3</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"workers"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">4</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"test_cases"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"tests/evaluation/test_cases/filter_csv_employees_by_age_and_country.test.json"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"tests/evaluation/test_cases/hello_world.test.json"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"evaluation_settings"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"tool_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"response_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"llm_evaluator"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"env"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_PLANNING_MODEL_NAME"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"openai/gemini-2.5-pro"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_ENDPOINT_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_ENDPOINT"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_API_KEY_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_API_KEY"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
77
|
-
<h3 class="anchor
|
|
77
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="remote-evaluation">Remote Evaluation<a href="#remote-evaluation" class="hash-link" aria-label="Direct link to Remote Evaluation" title="Direct link to Remote Evaluation" translate="no"></a></h3>
|
|
78
78
|
<p>In a remote evaluation, the evaluation framework sends requests to a remote Agent Mesh instance. This mode is useful for testing agents in a production-like environment where the agents are running on a separate server. The remote environment must have a REST gateway running to accept requests from the evaluation framework. You can also use an authentication token to communicate securely with the remote SAM instance.</p>
|
|
79
|
-
<h4 class="anchor
|
|
79
|
+
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id="remote-test-suite-configuration">Remote Test Suite Configuration<a href="#remote-test-suite-configuration" class="hash-link" aria-label="Direct link to Remote Test Suite Configuration" title="Direct link to Remote Test Suite Configuration" translate="no"></a></h4>
|
|
80
80
|
<p>For a remote evaluation, you must define the <code>broker</code>, <code>remote</code>, and <code>test_cases</code> fields.</p>
|
|
81
81
|
<p>The <code>broker</code> field is a required object with connection details for the Solace event broker.</p>
|
|
82
82
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token property">"broker"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_URL_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_URL"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_USERNAME_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_USERNAME"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_PASSWORD_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_PASSWORD"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_VPN_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_VPN"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
@@ -91,45 +91,45 @@
|
|
|
91
91
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token property">"runs"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">5</span><br></span></code></pre></div></div>
|
|
92
92
|
<p>The <code>evaluation_settings</code> field is an optional object that allows you to configure the evaluation.</p>
|
|
93
93
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token property">"evaluation_settings"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"tool_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"response_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
94
|
-
<h4 class="anchor
|
|
94
|
+
<h4 class="anchor anchorTargetStickyNavbar_Vzrq" id="example-remote-test-suite">Example Remote Test Suite<a href="#example-remote-test-suite" class="hash-link" aria-label="Direct link to Example Remote Test Suite" title="Direct link to Example Remote Test Suite" translate="no"></a></h4>
|
|
95
95
|
<div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"broker"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_URL_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_URL"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_USERNAME_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_USERNAME"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_PASSWORD_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_PASSWORD"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"SOLACE_BROKER_VPN_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"SOLACE_BROKER_VPN"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"remote"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"EVAL_REMOTE_URL_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"EVAL_REMOTE_URL"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"EVAL_AUTH_TOKEN_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"EVAL_AUTH_TOKEN"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"EVAL_NAMESPACE_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"EVAL_NAMESPACE"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"results_dir_name"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"sam-remote-eval-test"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"runs"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token number">1</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"test_cases"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"tests/evaluation/test_cases/filter_csv_employees_by_age_and_country.test.json"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"tests/evaluation/test_cases/hello_world.test.json"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">]</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"evaluation_settings"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"tool_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"response_match"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"llm_evaluator"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"enabled"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token boolean">true</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"env"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_PLANNING_MODEL_NAME"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"openai/gemini-2.5-pro"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_ENDPOINT_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_ENDPOINT"</span><span class="token punctuation" style="color:rgb(248, 248, 242)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token property">"LLM_SERVICE_API_KEY_VAR"</span><span class="token operator">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(255, 121, 198)">"LLM_SERVICE_API_KEY"</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"></span><span class="token punctuation" style="color:rgb(248, 248, 242)">}</span><br></span></code></pre></div></div>
|
|
96
|
-
<h2 class="anchor
|
|
96
|
+
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="evaluation-settings">Evaluation Settings<a href="#evaluation-settings" class="hash-link" aria-label="Direct link to Evaluation Settings" title="Direct link to Evaluation Settings" translate="no"></a></h2>
|
|
97
97
|
<p>The <code>evaluation_settings</code> block in the test suite <code>JSON</code> file allows you to configure how the evaluation is performed. Each enabled setting provides a score from 0 to 1, which contributes to the overall score for the test case.</p>
|
|
98
|
-
<h3 class="anchor
|
|
98
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="tool_match"><code>tool_match</code><a href="#tool_match" class="hash-link" aria-label="Direct link to tool_match" title="Direct link to tool_match" translate="no"></a></h3>
|
|
99
99
|
<p>The <code>tool_match</code> setting compares the tools the agent used with the <code>expected_tools</code> defined in the test case. This is a simple, direct comparison and does not use a language model for the evaluation. It is most effective when the agent's expected behavior is straightforward and there is a clear, correct sequence of tools to be used. In more complex scenarios where multiple paths could lead to a successful outcome, this method may not be the best way to evaluate the agent's performance.</p>
|
|
100
|
-
<h3 class="anchor
|
|
101
|
-
<p>The <code>response_match</code> setting compares the agent's final response with the <code>expected_response</code> from the test case. This comparison is based on the ROUGE metric, which evaluates the similarity between two responses by comparing their sequence of words. This method does not use a language model for the evaluation and does not work well with synonyms, so it is most effective when the expected answer is consistent. For more information about the ROUGE metric, see the <a href="https://pypi.org/project/rouge-metric/" target="_blank" rel="noopener noreferrer">official documentation</a>.</p>
|
|
102
|
-
<h3 class="anchor
|
|
100
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="response_match"><code>response_match</code><a href="#response_match" class="hash-link" aria-label="Direct link to response_match" title="Direct link to response_match" translate="no"></a></h3>
|
|
101
|
+
<p>The <code>response_match</code> setting compares the agent's final response with the <code>expected_response</code> from the test case. This comparison is based on the ROUGE metric, which evaluates the similarity between two responses by comparing their sequence of words. This method does not use a language model for the evaluation and does not work well with synonyms, so it is most effective when the expected answer is consistent. For more information about the ROUGE metric, see the <a href="https://pypi.org/project/rouge-metric/" target="_blank" rel="noopener noreferrer" class="">official documentation</a>.</p>
|
|
102
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="llm_evaluator"><code>llm_evaluator</code><a href="#llm_evaluator" class="hash-link" aria-label="Direct link to llm_evaluator" title="Direct link to llm_evaluator" translate="no"></a></h3>
|
|
103
103
|
<p>The <code>llm_evaluator</code> setting uses a language model to evaluate the entire lifecycle of a request within the agent mesh. This includes the initial prompt, all tool calls, delegation between agents, artifact inputs and outputs, and the final message output. The evaluation is based on a <code>criterion</code> you provide in the test case, which defines what a successful outcome looks like. This is the most comprehensive evaluation method because it considers the full context of the request's execution.</p>
|
|
104
|
-
<h2 class="anchor
|
|
104
|
+
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="running-evaluations">Running Evaluations<a href="#running-evaluations" class="hash-link" aria-label="Direct link to Running Evaluations" title="Direct link to Running Evaluations" translate="no"></a></h2>
|
|
105
105
|
<p>After you create your test cases and test suite, you can run the evaluation from the command line using the <code>sam eval</code> command.</p>
|
|
106
|
-
<h3 class="anchor
|
|
106
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="command">Command<a href="#command" class="hash-link" aria-label="Direct link to Command" title="Direct link to Command" translate="no"></a></h3>
|
|
107
107
|
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token plain">sam eval <PATH> [OPTIONS]</span><br></span></code></pre></div></div>
|
|
108
108
|
<p>The command takes the path to the evaluation test suite <code>JSON</code> file as a required argument.</p>
|
|
109
|
-
<h3 class="anchor
|
|
109
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="options">Options<a href="#options" class="hash-link" aria-label="Direct link to Options" title="Direct link to Options" translate="no"></a></h3>
|
|
110
110
|
<ul>
|
|
111
|
-
<li><code>-v</code>, <code>--verbose</code>: Enable verbose output to see detailed logs during the evaluation run.</li>
|
|
112
|
-
<li><code>-h</code>, <code>--help</code>: Show a help message with information about the command and its options.</li>
|
|
111
|
+
<li class=""><code>-v</code>, <code>--verbose</code>: Enable verbose output to see detailed logs during the evaluation run.</li>
|
|
112
|
+
<li class=""><code>-h</code>, <code>--help</code>: Show a help message with information about the command and its options.</li>
|
|
113
113
|
</ul>
|
|
114
|
-
<h3 class="anchor
|
|
114
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="example">Example<a href="#example" class="hash-link" aria-label="Direct link to Example" title="Direct link to Example" translate="no"></a></h3>
|
|
115
115
|
<div class="language-bash codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-bash codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token plain">sam eval tests/evaluation/local_example.json --verbose</span><br></span></code></pre></div></div>
|
|
116
|
-
<h2 class="anchor
|
|
116
|
+
<h2 class="anchor anchorTargetStickyNavbar_Vzrq" id="interpreting-the-results">Interpreting the Results<a href="#interpreting-the-results" class="hash-link" aria-label="Direct link to Interpreting the Results" title="Direct link to Interpreting the Results" translate="no"></a></h2>
|
|
117
117
|
<p>After an evaluation run is complete, the framework stores the results in a directory. The path to this directory is <code>results/</code> followed by the <code>results_dir_name</code> you specified in the test suite.</p>
|
|
118
|
-
<h3 class="anchor
|
|
118
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="results-directory">Results Directory<a href="#results-directory" class="hash-link" aria-label="Direct link to Results Directory" title="Direct link to Results Directory" translate="no"></a></h3>
|
|
119
119
|
<p>The results directory has the following structure:</p>
|
|
120
120
|
<div class="language-text codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#F8F8F2;--prism-background-color:#282A36"><div class="codeBlockContent_QJqH"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#F8F8F2;background-color:#282A36"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#F8F8F2"><span class="token plain"><results_dir_name>/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">├── report.html</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">├── stats.json</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain">└── <model_name>/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ├── full_messages.json</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ├── results.json</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> └── <test_case_id>/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> └── run_1/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ├── messages.json</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ├── summary.json</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> └── test_case_info.json</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> └── run_2/</span><br></span><span class="token-line" style="color:#F8F8F2"><span class="token plain"> ├── ...</span><br></span></code></pre></div></div>
|
|
121
121
|
<ul>
|
|
122
|
-
<li><strong><code>report.html</code></strong>: An <code>HTML</code> report that provides a comprehensive overview of the evaluation results. It includes a summary of the test runs, a breakdown of the results for each test case, and detailed logs for each test run. This report is the primary tool for analyzing the results of an evaluation.</li>
|
|
123
|
-
<li><strong><code>stats.json</code></strong>: A <code>JSON</code> file containing detailed statistics about the evaluation run, including scores for each evaluation metric.</li>
|
|
124
|
-
<li><strong><code><model_name>/</code></strong>: A directory for each language model tested (or a single <code>remote</code> directory for remote evaluations).<!-- -->
|
|
122
|
+
<li class=""><strong><code>report.html</code></strong>: An <code>HTML</code> report that provides a comprehensive overview of the evaluation results. It includes a summary of the test runs, a breakdown of the results for each test case, and detailed logs for each test run. This report is the primary tool for analyzing the results of an evaluation.</li>
|
|
123
|
+
<li class=""><strong><code>stats.json</code></strong>: A <code>JSON</code> file containing detailed statistics about the evaluation run, including scores for each evaluation metric.</li>
|
|
124
|
+
<li class=""><strong><code><model_name>/</code></strong>: A directory for each language model tested (or a single <code>remote</code> directory for remote evaluations).<!-- -->
|
|
125
125
|
<ul>
|
|
126
|
-
<li><strong><code>full_messages.json</code></strong>: A log of all messages exchanged during the evaluation for that model.</li>
|
|
127
|
-
<li><strong><code>results.json</code></strong>: The raw evaluation results for each test case.</li>
|
|
128
|
-
<li><strong><code><test_case_id>/</code></strong>: A directory for each test case, containing a <code>run_n</code> subdirectory for each run of the test case. These directories contain detailed logs and artifacts for each run.</li>
|
|
126
|
+
<li class=""><strong><code>full_messages.json</code></strong>: A log of all messages exchanged during the evaluation for that model.</li>
|
|
127
|
+
<li class=""><strong><code>results.json</code></strong>: The raw evaluation results for each test case.</li>
|
|
128
|
+
<li class=""><strong><code><test_case_id>/</code></strong>: A directory for each test case, containing a <code>run_n</code> subdirectory for each run of the test case. These directories contain detailed logs and artifacts for each run.</li>
|
|
129
129
|
</ul>
|
|
130
130
|
</li>
|
|
131
131
|
</ul>
|
|
132
|
-
<h3 class="anchor
|
|
133
|
-
<p>The <code>report.html</code> file provides a comprehensive overview of the evaluation results. It includes a summary of the test runs, a breakdown of the results for each test case, and detailed logs for each test run. This report is the primary tool for analyzing the results of an evaluation. You can open this file in a web browser to view the report.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col"><a href="https://github.com/SolaceLabs/solace-agent-mesh/edit/main/docs/docs/documentation/developing/evaluations.md" target="_blank" rel="noopener noreferrer" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/solace-agent-mesh/docs/documentation/developing/creating-service-providers"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Creating Service Providers</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/solace-agent-mesh/docs/documentation/developing/tutorials/custom-agent"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Build Your Own Agent</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#creating-a-test-case" class="table-of-contents__link toc-highlight">Creating a Test Case</a><ul><li><a href="#test-case-configuration" class="table-of-contents__link toc-highlight">Test Case Configuration</a></li><li><a href="#test-case-examples" class="table-of-contents__link toc-highlight">Test Case Examples</a></li></ul></li><li><a href="#creating-a-test-suite" class="table-of-contents__link toc-highlight">Creating a Test Suite</a><ul><li><a href="#local-evaluation" class="table-of-contents__link toc-highlight">Local Evaluation</a></li><li><a href="#remote-evaluation" class="table-of-contents__link toc-highlight">Remote Evaluation</a></li></ul></li><li><a href="#evaluation-settings" class="table-of-contents__link toc-highlight">Evaluation Settings</a><ul><li><a href="#tool_match" class="table-of-contents__link toc-highlight"><code>tool_match</code></a></li><li><a href="#response_match" class="table-of-contents__link toc-highlight"><code>response_match</code></a></li><li><a href="#llm_evaluator" class="table-of-contents__link toc-highlight"><code>llm_evaluator</code></a></li></ul></li><li><a href="#running-evaluations" class="table-of-contents__link toc-highlight">Running Evaluations</a><ul><li><a href="#command" class="table-of-contents__link toc-highlight">Command</a></li><li><a href="#options" class="table-of-contents__link toc-highlight">Options</a></li><li><a href="#example" class="table-of-contents__link toc-highlight">Example</a></li></ul></li><li><a href="#interpreting-the-results" class="table-of-contents__link toc-highlight">Interpreting the Results</a><ul><li><a href="#results-directory" class="table-of-contents__link toc-highlight">Results Directory</a></li><li><a href="#html-report" class="table-of-contents__link toc-highlight">HTML Report</a></li></ul></li></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class="footer__title">Solace Agent Mesh</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/solace-agent-mesh/docs/documentation/getting-started">Documentation</a></li><li class="footer__item"><a href="https://github.com/SolaceLabs/solace-agent-mesh/" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-
|
|
132
|
+
<h3 class="anchor anchorTargetStickyNavbar_Vzrq" id="html-report">HTML Report<a href="#html-report" class="hash-link" aria-label="Direct link to HTML Report" title="Direct link to HTML Report" translate="no"></a></h3>
|
|
133
|
+
<p>The <code>report.html</code> file provides a comprehensive overview of the evaluation results. It includes a summary of the test runs, a breakdown of the results for each test case, and detailed logs for each test run. This report is the primary tool for analyzing the results of an evaluation. You can open this file in a web browser to view the report.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col noPrint_WFHX"><a href="https://github.com/SolaceLabs/solace-agent-mesh/edit/main/docs/docs/documentation/developing/evaluations.md" target="_blank" rel="noopener noreferrer" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_Z9Sw" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_JAkA"></div></div></footer></article><nav class="docusaurus-mt-lg pagination-nav" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/solace-agent-mesh/docs/documentation/developing/creating-service-providers"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Creating Service Providers</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/solace-agent-mesh/docs/documentation/developing/tutorials/custom-agent"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Build Your Own Agent</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#creating-a-test-case" class="table-of-contents__link toc-highlight">Creating a Test Case</a><ul><li><a href="#test-case-configuration" class="table-of-contents__link toc-highlight">Test Case Configuration</a></li><li><a href="#test-case-examples" class="table-of-contents__link toc-highlight">Test Case Examples</a></li></ul></li><li><a href="#creating-a-test-suite" class="table-of-contents__link toc-highlight">Creating a Test Suite</a><ul><li><a href="#local-evaluation" class="table-of-contents__link toc-highlight">Local Evaluation</a></li><li><a href="#remote-evaluation" class="table-of-contents__link toc-highlight">Remote Evaluation</a></li></ul></li><li><a href="#evaluation-settings" class="table-of-contents__link toc-highlight">Evaluation Settings</a><ul><li><a href="#tool_match" class="table-of-contents__link toc-highlight"><code>tool_match</code></a></li><li><a href="#response_match" class="table-of-contents__link toc-highlight"><code>response_match</code></a></li><li><a href="#llm_evaluator" class="table-of-contents__link toc-highlight"><code>llm_evaluator</code></a></li></ul></li><li><a href="#running-evaluations" class="table-of-contents__link toc-highlight">Running Evaluations</a><ul><li><a href="#command" class="table-of-contents__link toc-highlight">Command</a></li><li><a href="#options" class="table-of-contents__link toc-highlight">Options</a></li><li><a href="#example" class="table-of-contents__link toc-highlight">Example</a></li></ul></li><li><a href="#interpreting-the-results" class="table-of-contents__link toc-highlight">Interpreting the Results</a><ul><li><a href="#results-directory" class="table-of-contents__link toc-highlight">Results Directory</a></li><li><a href="#html-report" class="table-of-contents__link toc-highlight">HTML Report</a></li></ul></li></ul></div></div></div></div></main></div></div></div><footer class="theme-layout-footer footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="theme-layout-footer-column col footer__col"><div class="footer__title">Solace Agent Mesh</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/solace-agent-mesh/docs/documentation/getting-started">Documentation</a></li><li class="footer__item"><a href="https://github.com/SolaceLabs/solace-agent-mesh/" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://github.com/SolaceLabs/solace-agent-mesh-core-plugins/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Official Plugins<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li></ul></div><div class="theme-layout-footer-column col footer__col"><div class="footer__title">Company</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://solace.com/products/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Products<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://solace.com/contact/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Contact<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://solace.com/support/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Support<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://solace.com/legal/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Privacy and Legal<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li></ul></div><div class="theme-layout-footer-column col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://www.linkedin.com/company/solacedotcom/" target="_blank" rel="noopener noreferrer" class="footer__link-item">LinkedIn<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://github.com/SolaceLabs" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://www.youtube.com/SolaceSystems" target="_blank" rel="noopener noreferrer" class="footer__link-item">YouTube<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li><li class="footer__item"><a href="https://twitter.com/solacedotcom" target="_blank" rel="noopener noreferrer" class="footer__link-item">X<svg width="13.5" height="13.5" aria-label="(opens in new tab)" class="iconExternalLink_nPIU"><use href="#theme-svg-external-link"></use></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><img src="/solace-agent-mesh/img/solace-logo.png" alt="Solace Logo" class="footer__logo themedComponent_mlkZ themedComponent--light_NVdE" width="10%" height="10%"><img src="/solace-agent-mesh/img/solace-logo.png" alt="Solace Logo" class="footer__logo themedComponent_mlkZ themedComponent--dark_xIcU" width="10%" height="10%"></div><div class="footer__copyright">Solace Agent Mesh. Copyright © 2026 Solace. Version: 1.13.2</div></div></div></footer></div>
|
|
134
134
|
</body>
|
|
135
135
|
</html>
|