solace-agent-mesh 0.2.4__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of solace-agent-mesh might be problematic. Click here for more details.

Files changed (521) hide show
  1. solace_agent_mesh/__init__.py +5 -0
  2. solace_agent_mesh/agent/adk/adk_llm.txt +93 -0
  3. solace_agent_mesh/agent/adk/app_llm_agent.py +26 -0
  4. solace_agent_mesh/agent/adk/callbacks.py +1716 -0
  5. solace_agent_mesh/agent/adk/filesystem_artifact_service.py +381 -0
  6. solace_agent_mesh/agent/adk/invocation_monitor.py +295 -0
  7. solace_agent_mesh/agent/adk/models/lite_llm.py +872 -0
  8. solace_agent_mesh/agent/adk/models/models_llm.txt +94 -0
  9. solace_agent_mesh/agent/adk/runner.py +357 -0
  10. solace_agent_mesh/agent/adk/services.py +240 -0
  11. solace_agent_mesh/agent/adk/setup.py +751 -0
  12. solace_agent_mesh/agent/adk/stream_parser.py +214 -0
  13. solace_agent_mesh/agent/adk/tool_wrapper.py +139 -0
  14. solace_agent_mesh/agent/agent_llm.txt +41 -0
  15. solace_agent_mesh/agent/protocol/event_handlers.py +1444 -0
  16. solace_agent_mesh/agent/protocol/protocol_llm.txt +21 -0
  17. solace_agent_mesh/agent/sac/app.py +640 -0
  18. solace_agent_mesh/agent/sac/component.py +3496 -0
  19. solace_agent_mesh/agent/sac/patch_adk.py +111 -0
  20. solace_agent_mesh/agent/sac/sac_llm.txt +105 -0
  21. solace_agent_mesh/agent/sac/task_execution_context.py +185 -0
  22. solace_agent_mesh/agent/testing/__init__.py +3 -0
  23. solace_agent_mesh/agent/testing/debug_utils.py +135 -0
  24. solace_agent_mesh/agent/testing/testing_llm.txt +90 -0
  25. solace_agent_mesh/agent/tools/__init__.py +14 -0
  26. solace_agent_mesh/agent/tools/audio_tools.py +1622 -0
  27. solace_agent_mesh/agent/tools/builtin_artifact_tools.py +1954 -0
  28. solace_agent_mesh/agent/tools/builtin_data_analysis_tools.py +238 -0
  29. solace_agent_mesh/agent/tools/general_agent_tools.py +571 -0
  30. solace_agent_mesh/agent/tools/image_tools.py +1184 -0
  31. solace_agent_mesh/agent/tools/peer_agent_tool.py +290 -0
  32. solace_agent_mesh/agent/tools/registry.py +36 -0
  33. solace_agent_mesh/agent/tools/test_tools.py +135 -0
  34. solace_agent_mesh/agent/tools/tool_definition.py +45 -0
  35. solace_agent_mesh/agent/tools/tools_llm.txt +104 -0
  36. solace_agent_mesh/agent/tools/web_tools.py +381 -0
  37. solace_agent_mesh/agent/utils/artifact_helpers.py +927 -0
  38. solace_agent_mesh/agent/utils/config_parser.py +47 -0
  39. solace_agent_mesh/agent/utils/context_helpers.py +60 -0
  40. solace_agent_mesh/agent/utils/utils_llm.txt +153 -0
  41. solace_agent_mesh/assets/docs/404.html +16 -0
  42. solace_agent_mesh/assets/docs/assets/css/styles.906a1503.css +1 -0
  43. solace_agent_mesh/assets/docs/assets/images/Solace_AI_Framework_With_Broker-85f0a306a9bcdd20b390b7a949f6d862.png +0 -0
  44. solace_agent_mesh/assets/docs/assets/images/sac-flows-80d5b603c6aafd33e87945680ce0abf3.png +0 -0
  45. solace_agent_mesh/assets/docs/assets/images/sac_parts_of_a_component-cb3d0424b1d0c17734c5435cca6b4082.png +0 -0
  46. solace_agent_mesh/assets/docs/assets/js/04989206.674a8007.js +1 -0
  47. solace_agent_mesh/assets/docs/assets/js/0e682baa.79f0ab22.js +1 -0
  48. solace_agent_mesh/assets/docs/assets/js/1001.0182a8bd.js +1 -0
  49. solace_agent_mesh/assets/docs/assets/js/1023fc19.015679ca.js +1 -0
  50. solace_agent_mesh/assets/docs/assets/js/1039.0bd46aa1.js +1 -0
  51. solace_agent_mesh/assets/docs/assets/js/149.b797a808.js +1 -0
  52. solace_agent_mesh/assets/docs/assets/js/1523c6b4.91c7bc01.js +1 -0
  53. solace_agent_mesh/assets/docs/assets/js/165.6a39807d.js +2 -0
  54. solace_agent_mesh/assets/docs/assets/js/165.6a39807d.js.LICENSE.txt +9 -0
  55. solace_agent_mesh/assets/docs/assets/js/166ab619.7d97ccaf.js +1 -0
  56. solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js +2 -0
  57. solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js.LICENSE.txt +7 -0
  58. solace_agent_mesh/assets/docs/assets/js/1c6e87d2.a8c5ce5a.js +1 -0
  59. solace_agent_mesh/assets/docs/assets/js/2130.ab9fd314.js +1 -0
  60. solace_agent_mesh/assets/docs/assets/js/21ceee5f.614fa8dd.js +1 -0
  61. solace_agent_mesh/assets/docs/assets/js/2237.5e477fc6.js +1 -0
  62. solace_agent_mesh/assets/docs/assets/js/2334.622a6395.js +1 -0
  63. solace_agent_mesh/assets/docs/assets/js/2a9cab12.8909df92.js +1 -0
  64. solace_agent_mesh/assets/docs/assets/js/3219.adc1d663.js +1 -0
  65. solace_agent_mesh/assets/docs/assets/js/332e10b5.7a103f42.js +1 -0
  66. solace_agent_mesh/assets/docs/assets/js/3624.b524e433.js +1 -0
  67. solace_agent_mesh/assets/docs/assets/js/375.708d48db.js +1 -0
  68. solace_agent_mesh/assets/docs/assets/js/3834.b6cd790e.js +1 -0
  69. solace_agent_mesh/assets/docs/assets/js/3d406171.f722eaf5.js +1 -0
  70. solace_agent_mesh/assets/docs/assets/js/4250.95455b28.js +1 -0
  71. solace_agent_mesh/assets/docs/assets/js/42b3f8d8.36090198.js +1 -0
  72. solace_agent_mesh/assets/docs/assets/js/4356.d169ab5b.js +1 -0
  73. solace_agent_mesh/assets/docs/assets/js/442a8107.5ba94b65.js +1 -0
  74. solace_agent_mesh/assets/docs/assets/js/4458.518e66fa.js +1 -0
  75. solace_agent_mesh/assets/docs/assets/js/4488.c7cc3442.js +1 -0
  76. solace_agent_mesh/assets/docs/assets/js/4494.6ee23046.js +1 -0
  77. solace_agent_mesh/assets/docs/assets/js/4855.fc4444b6.js +1 -0
  78. solace_agent_mesh/assets/docs/assets/js/4866.22daefc0.js +1 -0
  79. solace_agent_mesh/assets/docs/assets/js/4950.ca4caeda.js +1 -0
  80. solace_agent_mesh/assets/docs/assets/js/4c2787c2.66ee00e9.js +1 -0
  81. solace_agent_mesh/assets/docs/assets/js/5388.7a136447.js +1 -0
  82. solace_agent_mesh/assets/docs/assets/js/55f47984.c484bf96.js +1 -0
  83. solace_agent_mesh/assets/docs/assets/js/5607.081356f8.js +1 -0
  84. solace_agent_mesh/assets/docs/assets/js/5864.b0d0e9de.js +1 -0
  85. solace_agent_mesh/assets/docs/assets/js/5b4258a4.bda20761.js +1 -0
  86. solace_agent_mesh/assets/docs/assets/js/5e95c892.558d5167.js +1 -0
  87. solace_agent_mesh/assets/docs/assets/js/6143.0a1464c9.js +1 -0
  88. solace_agent_mesh/assets/docs/assets/js/6395.e9c73649.js +1 -0
  89. solace_agent_mesh/assets/docs/assets/js/6796.51d2c9b7.js +1 -0
  90. solace_agent_mesh/assets/docs/assets/js/6976.379be23b.js +1 -0
  91. solace_agent_mesh/assets/docs/assets/js/6978.ee0b945c.js +1 -0
  92. solace_agent_mesh/assets/docs/assets/js/7040.cb436723.js +1 -0
  93. solace_agent_mesh/assets/docs/assets/js/7195.412f418a.js +1 -0
  94. solace_agent_mesh/assets/docs/assets/js/7280.3fb73bdb.js +1 -0
  95. solace_agent_mesh/assets/docs/assets/js/768e31b0.a12673db.js +1 -0
  96. solace_agent_mesh/assets/docs/assets/js/7845.e33e7c4c.js +1 -0
  97. solace_agent_mesh/assets/docs/assets/js/7900.69516146.js +1 -0
  98. solace_agent_mesh/assets/docs/assets/js/8356.8a379c04.js +1 -0
  99. solace_agent_mesh/assets/docs/assets/js/85387663.6bf41934.js +1 -0
  100. solace_agent_mesh/assets/docs/assets/js/8567.4732c6b7.js +1 -0
  101. solace_agent_mesh/assets/docs/assets/js/8573.cb04eda5.js +1 -0
  102. solace_agent_mesh/assets/docs/assets/js/8577.1d54e766.js +1 -0
  103. solace_agent_mesh/assets/docs/assets/js/8591.d7c16be6.js +2 -0
  104. solace_agent_mesh/assets/docs/assets/js/8591.d7c16be6.js.LICENSE.txt +61 -0
  105. solace_agent_mesh/assets/docs/assets/js/8709.7ecd4047.js +1 -0
  106. solace_agent_mesh/assets/docs/assets/js/8731.49e930c2.js +1 -0
  107. solace_agent_mesh/assets/docs/assets/js/8908.f9d1b506.js +1 -0
  108. solace_agent_mesh/assets/docs/assets/js/9157.b4093d07.js +1 -0
  109. solace_agent_mesh/assets/docs/assets/js/9278.a4fd875d.js +1 -0
  110. solace_agent_mesh/assets/docs/assets/js/945fb41e.74d728aa.js +1 -0
  111. solace_agent_mesh/assets/docs/assets/js/9616.b75c2f6d.js +1 -0
  112. solace_agent_mesh/assets/docs/assets/js/9793.c6d16376.js +1 -0
  113. solace_agent_mesh/assets/docs/assets/js/9eff14a2.1bf8f61c.js +1 -0
  114. solace_agent_mesh/assets/docs/assets/js/a3a92b25.26ca071f.js +1 -0
  115. solace_agent_mesh/assets/docs/assets/js/a7bd4aaa.2204d2f7.js +1 -0
  116. solace_agent_mesh/assets/docs/assets/js/a94703ab.0438dbc2.js +1 -0
  117. solace_agent_mesh/assets/docs/assets/js/aba21aa0.c42a534c.js +1 -0
  118. solace_agent_mesh/assets/docs/assets/js/aba87c2f.d3e2dcc3.js +1 -0
  119. solace_agent_mesh/assets/docs/assets/js/ae4415af.8e279b5d.js +1 -0
  120. solace_agent_mesh/assets/docs/assets/js/b7006a3a.40b10c9d.js +1 -0
  121. solace_agent_mesh/assets/docs/assets/js/bac0be12.f50d9bac.js +1 -0
  122. solace_agent_mesh/assets/docs/assets/js/bb2ef573.207e6990.js +1 -0
  123. solace_agent_mesh/assets/docs/assets/js/c2c06897.63b76e9e.js +1 -0
  124. solace_agent_mesh/assets/docs/assets/js/cc969b05.954186d4.js +1 -0
  125. solace_agent_mesh/assets/docs/assets/js/cd3d4052.ca6eed8c.js +1 -0
  126. solace_agent_mesh/assets/docs/assets/js/ced92a13.fb92e7ca.js +1 -0
  127. solace_agent_mesh/assets/docs/assets/js/cee5d587.f5b73ca1.js +1 -0
  128. solace_agent_mesh/assets/docs/assets/js/f284c35a.ecc3d195.js +1 -0
  129. solace_agent_mesh/assets/docs/assets/js/f897a61a.f8c53b0f.js +1 -0
  130. solace_agent_mesh/assets/docs/assets/js/fbfa3e75.aca209c9.js +1 -0
  131. solace_agent_mesh/assets/docs/assets/js/main.c6286d7c.js +2 -0
  132. solace_agent_mesh/assets/docs/assets/js/main.c6286d7c.js.LICENSE.txt +81 -0
  133. solace_agent_mesh/assets/docs/assets/js/runtime~main.d5133813.js +1 -0
  134. solace_agent_mesh/assets/docs/docs/documentation/concepts/agents/index.html +128 -0
  135. solace_agent_mesh/assets/docs/docs/documentation/concepts/architecture/index.html +91 -0
  136. solace_agent_mesh/assets/docs/docs/documentation/concepts/cli/index.html +201 -0
  137. solace_agent_mesh/assets/docs/docs/documentation/concepts/gateways/index.html +91 -0
  138. solace_agent_mesh/assets/docs/docs/documentation/concepts/orchestrator/index.html +55 -0
  139. solace_agent_mesh/assets/docs/docs/documentation/concepts/plugins/index.html +82 -0
  140. solace_agent_mesh/assets/docs/docs/documentation/deployment/debugging/index.html +77 -0
  141. solace_agent_mesh/assets/docs/docs/documentation/deployment/deploy/index.html +48 -0
  142. solace_agent_mesh/assets/docs/docs/documentation/deployment/observability/index.html +54 -0
  143. solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +17 -0
  144. solace_agent_mesh/assets/docs/docs/documentation/getting-started/component-overview/index.html +45 -0
  145. solace_agent_mesh/assets/docs/docs/documentation/getting-started/installation/index.html +76 -0
  146. solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +150 -0
  147. solace_agent_mesh/assets/docs/docs/documentation/getting-started/quick-start/index.html +54 -0
  148. solace_agent_mesh/assets/docs/docs/documentation/tutorials/bedrock-agents/index.html +267 -0
  149. solace_agent_mesh/assets/docs/docs/documentation/tutorials/custom-agent/index.html +136 -0
  150. solace_agent_mesh/assets/docs/docs/documentation/tutorials/event-mesh-gateway/index.html +116 -0
  151. solace_agent_mesh/assets/docs/docs/documentation/tutorials/mcp-integration/index.html +80 -0
  152. solace_agent_mesh/assets/docs/docs/documentation/tutorials/mongodb-integration/index.html +164 -0
  153. solace_agent_mesh/assets/docs/docs/documentation/tutorials/rest-gateway/index.html +57 -0
  154. solace_agent_mesh/assets/docs/docs/documentation/tutorials/slack-integration/index.html +72 -0
  155. solace_agent_mesh/assets/docs/docs/documentation/tutorials/sql-database/index.html +102 -0
  156. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/artifact-management/index.html +99 -0
  157. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/audio-tools/index.html +90 -0
  158. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/data-analysis-tools/index.html +107 -0
  159. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/embeds/index.html +152 -0
  160. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/index.html +103 -0
  161. solace_agent_mesh/assets/docs/docs/documentation/user-guide/create-agents/index.html +170 -0
  162. solace_agent_mesh/assets/docs/docs/documentation/user-guide/create-gateways/index.html +200 -0
  163. solace_agent_mesh/assets/docs/docs/documentation/user-guide/creating-service-providers/index.html +54 -0
  164. solace_agent_mesh/assets/docs/docs/documentation/user-guide/solace-ai-connector/index.html +69 -0
  165. solace_agent_mesh/assets/docs/docs/documentation/user-guide/structure/index.html +59 -0
  166. solace_agent_mesh/assets/docs/img/Solace_AI_Framework_README.png +0 -0
  167. solace_agent_mesh/assets/docs/img/Solace_AI_Framework_With_Broker.png +0 -0
  168. solace_agent_mesh/assets/docs/img/logo.png +0 -0
  169. solace_agent_mesh/assets/docs/img/sac-flows.png +0 -0
  170. solace_agent_mesh/assets/docs/img/sac_parts_of_a_component.png +0 -0
  171. solace_agent_mesh/assets/docs/img/solace-logo.png +0 -0
  172. solace_agent_mesh/assets/docs/lunr-index-1754075282978.json +1 -0
  173. solace_agent_mesh/assets/docs/lunr-index.json +1 -0
  174. solace_agent_mesh/assets/docs/search-doc-1754075282978.json +1 -0
  175. solace_agent_mesh/assets/docs/search-doc.json +1 -0
  176. solace_agent_mesh/assets/docs/sitemap.xml +1 -0
  177. solace_agent_mesh/cli/__init__.py +1 -1
  178. solace_agent_mesh/cli/commands/add_cmd/__init__.py +15 -0
  179. solace_agent_mesh/cli/commands/add_cmd/add_cmd_llm.txt +250 -0
  180. solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +659 -0
  181. solace_agent_mesh/cli/commands/add_cmd/gateway_cmd.py +322 -0
  182. solace_agent_mesh/cli/commands/add_cmd/web_add_agent_step.py +93 -0
  183. solace_agent_mesh/cli/commands/add_cmd/web_add_gateway_step.py +118 -0
  184. solace_agent_mesh/cli/commands/docs_cmd.py +57 -0
  185. solace_agent_mesh/cli/commands/eval_cmd.py +64 -0
  186. solace_agent_mesh/cli/commands/init_cmd/__init__.py +404 -0
  187. solace_agent_mesh/cli/commands/init_cmd/broker_step.py +201 -0
  188. solace_agent_mesh/cli/commands/init_cmd/directory_step.py +28 -0
  189. solace_agent_mesh/cli/commands/init_cmd/env_step.py +205 -0
  190. solace_agent_mesh/cli/commands/init_cmd/init_cmd_llm.txt +365 -0
  191. solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +407 -0
  192. solace_agent_mesh/cli/commands/init_cmd/project_files_step.py +38 -0
  193. solace_agent_mesh/cli/commands/init_cmd/web_init_step.py +110 -0
  194. solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +183 -0
  195. solace_agent_mesh/cli/commands/plugin_cmd/__init__.py +18 -0
  196. solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +372 -0
  197. solace_agent_mesh/cli/commands/plugin_cmd/build_cmd.py +86 -0
  198. solace_agent_mesh/cli/commands/plugin_cmd/catalog_cmd.py +139 -0
  199. solace_agent_mesh/cli/commands/plugin_cmd/create_cmd.py +309 -0
  200. solace_agent_mesh/cli/commands/plugin_cmd/official_registry.py +175 -0
  201. solace_agent_mesh/cli/commands/plugin_cmd/plugin_cmd_llm.txt +305 -0
  202. solace_agent_mesh/cli/commands/run_cmd.py +158 -0
  203. solace_agent_mesh/cli/main.py +17 -294
  204. solace_agent_mesh/cli/utils.py +135 -204
  205. solace_agent_mesh/client/webui/frontend/static/assets/authCallback-DvlO62me.js +1 -0
  206. solace_agent_mesh/client/webui/frontend/static/assets/client-bp6u3qVZ.js +49 -0
  207. solace_agent_mesh/client/webui/frontend/static/assets/favicon-BLgzUch9.ico +0 -0
  208. solace_agent_mesh/client/webui/frontend/static/assets/main-D11Lmy9p.css +1 -0
  209. solace_agent_mesh/client/webui/frontend/static/assets/main-Gfk3BYn5.js +663 -0
  210. solace_agent_mesh/client/webui/frontend/static/auth-callback.html +14 -0
  211. solace_agent_mesh/client/webui/frontend/static/index.html +15 -0
  212. solace_agent_mesh/common/__init__.py +1 -0
  213. solace_agent_mesh/common/a2a_protocol.py +564 -0
  214. solace_agent_mesh/common/agent_registry.py +42 -0
  215. solace_agent_mesh/common/client/__init__.py +4 -0
  216. solace_agent_mesh/common/client/card_resolver.py +21 -0
  217. solace_agent_mesh/common/client/client.py +85 -0
  218. solace_agent_mesh/common/client/client_llm.txt +133 -0
  219. solace_agent_mesh/common/common_llm.txt +144 -0
  220. solace_agent_mesh/common/constants.py +1 -14
  221. solace_agent_mesh/common/middleware/__init__.py +12 -0
  222. solace_agent_mesh/common/middleware/config_resolver.py +130 -0
  223. solace_agent_mesh/common/middleware/middleware_llm.txt +174 -0
  224. solace_agent_mesh/common/middleware/registry.py +125 -0
  225. solace_agent_mesh/common/server/__init__.py +4 -0
  226. solace_agent_mesh/common/server/server.py +122 -0
  227. solace_agent_mesh/common/server/server_llm.txt +169 -0
  228. solace_agent_mesh/common/server/task_manager.py +291 -0
  229. solace_agent_mesh/common/server/utils.py +28 -0
  230. solace_agent_mesh/common/services/__init__.py +4 -0
  231. solace_agent_mesh/common/services/employee_service.py +162 -0
  232. solace_agent_mesh/common/services/identity_service.py +129 -0
  233. solace_agent_mesh/common/services/providers/__init__.py +4 -0
  234. solace_agent_mesh/common/services/providers/local_file_identity_service.py +148 -0
  235. solace_agent_mesh/common/services/providers/providers_llm.txt +113 -0
  236. solace_agent_mesh/common/services/services_llm.txt +132 -0
  237. solace_agent_mesh/common/types.py +411 -0
  238. solace_agent_mesh/common/utils/__init__.py +7 -0
  239. solace_agent_mesh/common/utils/asyncio_macos_fix.py +86 -0
  240. solace_agent_mesh/common/utils/embeds/__init__.py +33 -0
  241. solace_agent_mesh/common/utils/embeds/constants.py +55 -0
  242. solace_agent_mesh/common/utils/embeds/converter.py +452 -0
  243. solace_agent_mesh/common/utils/embeds/embeds_llm.txt +124 -0
  244. solace_agent_mesh/common/utils/embeds/evaluators.py +394 -0
  245. solace_agent_mesh/common/utils/embeds/modifiers.py +816 -0
  246. solace_agent_mesh/common/utils/embeds/resolver.py +865 -0
  247. solace_agent_mesh/common/utils/embeds/types.py +14 -0
  248. solace_agent_mesh/common/utils/in_memory_cache.py +108 -0
  249. solace_agent_mesh/common/utils/initializer.py +51 -0
  250. solace_agent_mesh/common/utils/log_formatters.py +44 -0
  251. solace_agent_mesh/common/utils/mime_helpers.py +106 -0
  252. solace_agent_mesh/common/utils/push_notification_auth.py +134 -0
  253. solace_agent_mesh/common/utils/utils_llm.txt +67 -0
  254. solace_agent_mesh/config_portal/backend/common.py +66 -24
  255. solace_agent_mesh/config_portal/backend/plugin_catalog/constants.py +24 -0
  256. solace_agent_mesh/config_portal/backend/plugin_catalog/models.py +49 -0
  257. solace_agent_mesh/config_portal/backend/plugin_catalog/registry_manager.py +164 -0
  258. solace_agent_mesh/config_portal/backend/plugin_catalog/scraper.py +521 -0
  259. solace_agent_mesh/config_portal/backend/plugin_catalog_server.py +217 -0
  260. solace_agent_mesh/config_portal/backend/server.py +551 -181
  261. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-_7yox_eh.js +48 -0
  262. solace_agent_mesh/config_portal/frontend/static/client/assets/components-B7lKcHVY.js +140 -0
  263. solace_agent_mesh/config_portal/frontend/static/client/assets/{entry.client-DX1misIU.js → entry.client-CEumGClk.js} +3 -3
  264. solace_agent_mesh/config_portal/frontend/static/client/assets/index-DSo1AH_7.js +68 -0
  265. solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-e5c3acfe.js +1 -0
  266. solace_agent_mesh/config_portal/frontend/static/client/assets/{root-BApq5dPK.js → root-C4XmHinv.js} +2 -2
  267. solace_agent_mesh/config_portal/frontend/static/client/assets/root-DxRwaWiE.css +1 -0
  268. solace_agent_mesh/config_portal/frontend/static/client/index.html +3 -3
  269. solace_agent_mesh/core_a2a/__init__.py +1 -0
  270. solace_agent_mesh/core_a2a/core_a2a_llm.txt +88 -0
  271. solace_agent_mesh/core_a2a/service.py +331 -0
  272. solace_agent_mesh/evaluation/config_loader.py +657 -0
  273. solace_agent_mesh/evaluation/evaluator.py +667 -0
  274. solace_agent_mesh/evaluation/message_organizer.py +568 -0
  275. solace_agent_mesh/evaluation/report/benchmark_info.html +35 -0
  276. solace_agent_mesh/evaluation/report/chart_section.html +141 -0
  277. solace_agent_mesh/evaluation/report/detailed_breakdown.html +28 -0
  278. solace_agent_mesh/evaluation/report/modal.html +59 -0
  279. solace_agent_mesh/evaluation/report/modal_chart_functions.js +411 -0
  280. solace_agent_mesh/evaluation/report/modal_script.js +296 -0
  281. solace_agent_mesh/evaluation/report/modal_styles.css +340 -0
  282. solace_agent_mesh/evaluation/report/performance_metrics_styles.css +93 -0
  283. solace_agent_mesh/evaluation/report/templates/footer.html +2 -0
  284. solace_agent_mesh/evaluation/report/templates/header.html +340 -0
  285. solace_agent_mesh/evaluation/report_data_processor.py +972 -0
  286. solace_agent_mesh/evaluation/report_generator.py +613 -0
  287. solace_agent_mesh/evaluation/run.py +613 -0
  288. solace_agent_mesh/evaluation/subscriber.py +872 -0
  289. solace_agent_mesh/evaluation/summary_builder.py +775 -0
  290. solace_agent_mesh/evaluation/test_case_loader.py +714 -0
  291. solace_agent_mesh/gateway/base/__init__.py +1 -0
  292. solace_agent_mesh/gateway/base/app.py +266 -0
  293. solace_agent_mesh/gateway/base/base_llm.txt +119 -0
  294. solace_agent_mesh/gateway/base/component.py +1542 -0
  295. solace_agent_mesh/gateway/base/task_context.py +74 -0
  296. solace_agent_mesh/gateway/gateway_llm.txt +125 -0
  297. solace_agent_mesh/gateway/http_sse/app.py +190 -0
  298. solace_agent_mesh/gateway/http_sse/component.py +1602 -0
  299. solace_agent_mesh/gateway/http_sse/components/__init__.py +7 -0
  300. solace_agent_mesh/gateway/http_sse/components/components_llm.txt +65 -0
  301. solace_agent_mesh/gateway/http_sse/components/visualization_forwarder_component.py +108 -0
  302. solace_agent_mesh/gateway/http_sse/dependencies.py +316 -0
  303. solace_agent_mesh/gateway/http_sse/http_sse_llm.txt +63 -0
  304. solace_agent_mesh/gateway/http_sse/main.py +442 -0
  305. solace_agent_mesh/gateway/http_sse/routers/__init__.py +4 -0
  306. solace_agent_mesh/gateway/http_sse/routers/agents.py +41 -0
  307. solace_agent_mesh/gateway/http_sse/routers/artifacts.py +827 -0
  308. solace_agent_mesh/gateway/http_sse/routers/auth.py +212 -0
  309. solace_agent_mesh/gateway/http_sse/routers/config.py +55 -0
  310. solace_agent_mesh/gateway/http_sse/routers/people.py +69 -0
  311. solace_agent_mesh/gateway/http_sse/routers/routers_llm.txt +37 -0
  312. solace_agent_mesh/gateway/http_sse/routers/sessions.py +80 -0
  313. solace_agent_mesh/gateway/http_sse/routers/sse.py +138 -0
  314. solace_agent_mesh/gateway/http_sse/routers/tasks.py +294 -0
  315. solace_agent_mesh/gateway/http_sse/routers/users.py +59 -0
  316. solace_agent_mesh/gateway/http_sse/routers/visualization.py +1131 -0
  317. solace_agent_mesh/gateway/http_sse/services/__init__.py +4 -0
  318. solace_agent_mesh/gateway/http_sse/services/agent_service.py +69 -0
  319. solace_agent_mesh/gateway/http_sse/services/people_service.py +158 -0
  320. solace_agent_mesh/gateway/http_sse/services/services_llm.txt +179 -0
  321. solace_agent_mesh/gateway/http_sse/services/task_service.py +121 -0
  322. solace_agent_mesh/gateway/http_sse/session_manager.py +187 -0
  323. solace_agent_mesh/gateway/http_sse/sse_manager.py +328 -0
  324. solace_agent_mesh/llm.txt +228 -0
  325. solace_agent_mesh/llm_detail.txt +2835 -0
  326. solace_agent_mesh/templates/agent_template.yaml +53 -0
  327. solace_agent_mesh/templates/eval_backend_template.yaml +54 -0
  328. solace_agent_mesh/templates/gateway_app_template.py +73 -0
  329. solace_agent_mesh/templates/gateway_component_template.py +431 -0
  330. solace_agent_mesh/templates/gateway_config_template.yaml +43 -0
  331. solace_agent_mesh/templates/logging_config_template.ini +64 -0
  332. solace_agent_mesh/templates/main_orchestrator.yaml +55 -0
  333. solace_agent_mesh/templates/plugin_agent_config_template.yaml +122 -0
  334. solace_agent_mesh/templates/plugin_custom_config_template.yaml +27 -0
  335. solace_agent_mesh/templates/plugin_custom_template.py +10 -0
  336. solace_agent_mesh/templates/plugin_gateway_config_template.yaml +63 -0
  337. solace_agent_mesh/templates/plugin_pyproject_template.toml +33 -0
  338. solace_agent_mesh/templates/plugin_readme_template.md +34 -0
  339. solace_agent_mesh/templates/plugin_tools_template.py +224 -0
  340. solace_agent_mesh/templates/shared_config.yaml +66 -0
  341. solace_agent_mesh/templates/templates_llm.txt +147 -0
  342. solace_agent_mesh/templates/webui.yaml +53 -0
  343. solace_agent_mesh-1.0.2.dist-info/METADATA +432 -0
  344. solace_agent_mesh-1.0.2.dist-info/RECORD +361 -0
  345. solace_agent_mesh-1.0.2.dist-info/entry_points.txt +3 -0
  346. {solace_agent_mesh-0.2.4.dist-info → solace_agent_mesh-1.0.2.dist-info}/licenses/LICENSE +1 -1
  347. solace_agent_mesh/agents/base_agent_component.py +0 -256
  348. solace_agent_mesh/agents/global/actions/agent_state_change.py +0 -54
  349. solace_agent_mesh/agents/global/actions/clear_history.py +0 -32
  350. solace_agent_mesh/agents/global/actions/convert_file_to_markdown.py +0 -160
  351. solace_agent_mesh/agents/global/actions/create_file.py +0 -70
  352. solace_agent_mesh/agents/global/actions/error_action.py +0 -45
  353. solace_agent_mesh/agents/global/actions/plantuml_diagram.py +0 -163
  354. solace_agent_mesh/agents/global/actions/plotly_graph.py +0 -152
  355. solace_agent_mesh/agents/global/actions/retrieve_file.py +0 -51
  356. solace_agent_mesh/agents/global/global_agent_component.py +0 -38
  357. solace_agent_mesh/agents/image_processing/actions/create_image.py +0 -75
  358. solace_agent_mesh/agents/image_processing/actions/describe_image.py +0 -115
  359. solace_agent_mesh/agents/image_processing/image_processing_agent_component.py +0 -23
  360. solace_agent_mesh/agents/slack/__init__.py +0 -1
  361. solace_agent_mesh/agents/slack/actions/__init__.py +0 -1
  362. solace_agent_mesh/agents/slack/actions/post_message.py +0 -177
  363. solace_agent_mesh/agents/slack/slack_agent_component.py +0 -59
  364. solace_agent_mesh/agents/web_request/actions/do_image_search.py +0 -84
  365. solace_agent_mesh/agents/web_request/actions/do_news_search.py +0 -47
  366. solace_agent_mesh/agents/web_request/actions/do_suggestion_search.py +0 -34
  367. solace_agent_mesh/agents/web_request/actions/do_web_request.py +0 -135
  368. solace_agent_mesh/agents/web_request/actions/download_file.py +0 -69
  369. solace_agent_mesh/agents/web_request/web_request_agent_component.py +0 -33
  370. solace_agent_mesh/assets/web-visualizer/assets/index-D0qORgkg.css +0 -1
  371. solace_agent_mesh/assets/web-visualizer/assets/index-DnDr1pnu.js +0 -109
  372. solace_agent_mesh/assets/web-visualizer/index.html +0 -14
  373. solace_agent_mesh/assets/web-visualizer/vite.svg +0 -1
  374. solace_agent_mesh/cli/commands/add/__init__.py +0 -3
  375. solace_agent_mesh/cli/commands/add/add.py +0 -88
  376. solace_agent_mesh/cli/commands/add/agent.py +0 -110
  377. solace_agent_mesh/cli/commands/add/copy_from_plugin.py +0 -92
  378. solace_agent_mesh/cli/commands/add/gateway.py +0 -374
  379. solace_agent_mesh/cli/commands/build.py +0 -670
  380. solace_agent_mesh/cli/commands/chat/__init__.py +0 -3
  381. solace_agent_mesh/cli/commands/chat/chat.py +0 -361
  382. solace_agent_mesh/cli/commands/config.py +0 -29
  383. solace_agent_mesh/cli/commands/init/__init__.py +0 -3
  384. solace_agent_mesh/cli/commands/init/ai_provider_step.py +0 -93
  385. solace_agent_mesh/cli/commands/init/broker_step.py +0 -99
  386. solace_agent_mesh/cli/commands/init/builtin_agent_step.py +0 -83
  387. solace_agent_mesh/cli/commands/init/check_if_already_done.py +0 -13
  388. solace_agent_mesh/cli/commands/init/create_config_file_step.py +0 -65
  389. solace_agent_mesh/cli/commands/init/create_other_project_files_step.py +0 -147
  390. solace_agent_mesh/cli/commands/init/file_service_step.py +0 -73
  391. solace_agent_mesh/cli/commands/init/init.py +0 -92
  392. solace_agent_mesh/cli/commands/init/project_structure_step.py +0 -16
  393. solace_agent_mesh/cli/commands/init/web_init_step.py +0 -32
  394. solace_agent_mesh/cli/commands/plugin/__init__.py +0 -3
  395. solace_agent_mesh/cli/commands/plugin/add.py +0 -100
  396. solace_agent_mesh/cli/commands/plugin/build.py +0 -268
  397. solace_agent_mesh/cli/commands/plugin/create.py +0 -117
  398. solace_agent_mesh/cli/commands/plugin/plugin.py +0 -124
  399. solace_agent_mesh/cli/commands/plugin/remove.py +0 -73
  400. solace_agent_mesh/cli/commands/run.py +0 -68
  401. solace_agent_mesh/cli/commands/visualizer.py +0 -138
  402. solace_agent_mesh/cli/config.py +0 -85
  403. solace_agent_mesh/common/action.py +0 -91
  404. solace_agent_mesh/common/action_list.py +0 -37
  405. solace_agent_mesh/common/action_response.py +0 -340
  406. solace_agent_mesh/common/mysql_database.py +0 -40
  407. solace_agent_mesh/common/postgres_database.py +0 -85
  408. solace_agent_mesh/common/prompt_templates.py +0 -28
  409. solace_agent_mesh/common/stimulus_utils.py +0 -152
  410. solace_agent_mesh/common/time.py +0 -24
  411. solace_agent_mesh/common/utils.py +0 -712
  412. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-a-zJ6rLx.js +0 -46
  413. solace_agent_mesh/config_portal/frontend/static/client/assets/components-ZIfdTbrV.js +0 -191
  414. solace_agent_mesh/config_portal/frontend/static/client/assets/index-BJHAE5s4.js +0 -17
  415. solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-44c41103.js +0 -1
  416. solace_agent_mesh/config_portal/frontend/static/client/assets/root-DX4gQ516.css +0 -1
  417. solace_agent_mesh/configs/agent_global.yaml +0 -74
  418. solace_agent_mesh/configs/agent_image_processing.yaml +0 -82
  419. solace_agent_mesh/configs/agent_slack.yaml +0 -64
  420. solace_agent_mesh/configs/agent_web_request.yaml +0 -75
  421. solace_agent_mesh/configs/conversation_to_file.yaml +0 -56
  422. solace_agent_mesh/configs/error_catcher.yaml +0 -56
  423. solace_agent_mesh/configs/monitor.yaml +0 -0
  424. solace_agent_mesh/configs/monitor_stim_and_errors_to_slack.yaml +0 -109
  425. solace_agent_mesh/configs/monitor_user_feedback.yaml +0 -58
  426. solace_agent_mesh/configs/orchestrator.yaml +0 -241
  427. solace_agent_mesh/configs/service_embedding.yaml +0 -81
  428. solace_agent_mesh/configs/service_llm.yaml +0 -265
  429. solace_agent_mesh/configs/visualize_websocket.yaml +0 -55
  430. solace_agent_mesh/gateway/components/gateway_base.py +0 -47
  431. solace_agent_mesh/gateway/components/gateway_input.py +0 -278
  432. solace_agent_mesh/gateway/components/gateway_output.py +0 -298
  433. solace_agent_mesh/gateway/identity/bamboohr_identity.py +0 -18
  434. solace_agent_mesh/gateway/identity/identity_base.py +0 -10
  435. solace_agent_mesh/gateway/identity/identity_provider.py +0 -60
  436. solace_agent_mesh/gateway/identity/no_identity.py +0 -9
  437. solace_agent_mesh/gateway/identity/passthru_identity.py +0 -9
  438. solace_agent_mesh/monitors/base_monitor_component.py +0 -26
  439. solace_agent_mesh/monitors/feedback/user_feedback_monitor.py +0 -75
  440. solace_agent_mesh/monitors/stim_and_errors/stim_and_error_monitor.py +0 -560
  441. solace_agent_mesh/orchestrator/__init__.py +0 -0
  442. solace_agent_mesh/orchestrator/action_manager.py +0 -237
  443. solace_agent_mesh/orchestrator/components/__init__.py +0 -0
  444. solace_agent_mesh/orchestrator/components/orchestrator_action_manager_timeout_component.py +0 -58
  445. solace_agent_mesh/orchestrator/components/orchestrator_action_response_component.py +0 -179
  446. solace_agent_mesh/orchestrator/components/orchestrator_register_component.py +0 -107
  447. solace_agent_mesh/orchestrator/components/orchestrator_stimulus_processor_component.py +0 -527
  448. solace_agent_mesh/orchestrator/components/orchestrator_streaming_output_component.py +0 -260
  449. solace_agent_mesh/orchestrator/orchestrator_main.py +0 -172
  450. solace_agent_mesh/orchestrator/orchestrator_prompt.py +0 -539
  451. solace_agent_mesh/services/__init__.py +0 -0
  452. solace_agent_mesh/services/authorization/providers/base_authorization_provider.py +0 -56
  453. solace_agent_mesh/services/bamboo_hr_service/__init__.py +0 -3
  454. solace_agent_mesh/services/bamboo_hr_service/bamboo_hr.py +0 -182
  455. solace_agent_mesh/services/common/__init__.py +0 -4
  456. solace_agent_mesh/services/common/auto_expiry.py +0 -45
  457. solace_agent_mesh/services/common/singleton.py +0 -18
  458. solace_agent_mesh/services/file_service/__init__.py +0 -14
  459. solace_agent_mesh/services/file_service/file_manager/__init__.py +0 -0
  460. solace_agent_mesh/services/file_service/file_manager/bucket_file_manager.py +0 -149
  461. solace_agent_mesh/services/file_service/file_manager/file_manager_base.py +0 -162
  462. solace_agent_mesh/services/file_service/file_manager/memory_file_manager.py +0 -64
  463. solace_agent_mesh/services/file_service/file_manager/volume_file_manager.py +0 -106
  464. solace_agent_mesh/services/file_service/file_service.py +0 -437
  465. solace_agent_mesh/services/file_service/file_service_constants.py +0 -54
  466. solace_agent_mesh/services/file_service/file_transformations.py +0 -141
  467. solace_agent_mesh/services/file_service/file_utils.py +0 -324
  468. solace_agent_mesh/services/file_service/transformers/__init__.py +0 -5
  469. solace_agent_mesh/services/history_service/__init__.py +0 -3
  470. solace_agent_mesh/services/history_service/history_providers/__init__.py +0 -0
  471. solace_agent_mesh/services/history_service/history_providers/base_history_provider.py +0 -54
  472. solace_agent_mesh/services/history_service/history_providers/file_history_provider.py +0 -74
  473. solace_agent_mesh/services/history_service/history_providers/index.py +0 -40
  474. solace_agent_mesh/services/history_service/history_providers/memory_history_provider.py +0 -33
  475. solace_agent_mesh/services/history_service/history_providers/mongodb_history_provider.py +0 -66
  476. solace_agent_mesh/services/history_service/history_providers/redis_history_provider.py +0 -66
  477. solace_agent_mesh/services/history_service/history_providers/sql_history_provider.py +0 -93
  478. solace_agent_mesh/services/history_service/history_service.py +0 -413
  479. solace_agent_mesh/services/history_service/long_term_memory/__init__.py +0 -0
  480. solace_agent_mesh/services/history_service/long_term_memory/long_term_memory.py +0 -399
  481. solace_agent_mesh/services/llm_service/components/llm_request_component.py +0 -340
  482. solace_agent_mesh/services/llm_service/components/llm_service_component_base.py +0 -152
  483. solace_agent_mesh/services/middleware_service/__init__.py +0 -0
  484. solace_agent_mesh/services/middleware_service/middleware_service.py +0 -20
  485. solace_agent_mesh/templates/action.py +0 -38
  486. solace_agent_mesh/templates/agent.py +0 -29
  487. solace_agent_mesh/templates/agent.yaml +0 -70
  488. solace_agent_mesh/templates/gateway-config-template.yaml +0 -6
  489. solace_agent_mesh/templates/gateway-default-config.yaml +0 -28
  490. solace_agent_mesh/templates/gateway-flows.yaml +0 -78
  491. solace_agent_mesh/templates/gateway-header.yaml +0 -16
  492. solace_agent_mesh/templates/gateway_base.py +0 -15
  493. solace_agent_mesh/templates/gateway_input.py +0 -98
  494. solace_agent_mesh/templates/gateway_output.py +0 -71
  495. solace_agent_mesh/templates/plugin-gateway-default-config.yaml +0 -29
  496. solace_agent_mesh/templates/plugin-pyproject.toml +0 -30
  497. solace_agent_mesh/templates/rest-api-default-config.yaml +0 -31
  498. solace_agent_mesh/templates/rest-api-flows.yaml +0 -81
  499. solace_agent_mesh/templates/slack-default-config.yaml +0 -16
  500. solace_agent_mesh/templates/slack-flows.yaml +0 -81
  501. solace_agent_mesh/templates/solace-agent-mesh-default.yaml +0 -86
  502. solace_agent_mesh/templates/solace-agent-mesh-plugin-default.yaml +0 -8
  503. solace_agent_mesh/templates/web-default-config.yaml +0 -10
  504. solace_agent_mesh/templates/web-flows.yaml +0 -76
  505. solace_agent_mesh/tools/__init__.py +0 -0
  506. solace_agent_mesh/tools/components/__init__.py +0 -0
  507. solace_agent_mesh/tools/components/conversation_formatter.py +0 -111
  508. solace_agent_mesh/tools/components/file_resolver_component.py +0 -58
  509. solace_agent_mesh/tools/config/runtime_config.py +0 -26
  510. solace_agent_mesh-0.2.4.dist-info/METADATA +0 -176
  511. solace_agent_mesh-0.2.4.dist-info/RECORD +0 -193
  512. solace_agent_mesh-0.2.4.dist-info/entry_points.txt +0 -3
  513. /solace_agent_mesh/{agents → agent}/__init__.py +0 -0
  514. /solace_agent_mesh/{agents/global → agent/adk}/__init__.py +0 -0
  515. /solace_agent_mesh/{agents/global/actions → agent/protocol}/__init__.py +0 -0
  516. /solace_agent_mesh/{agents/image_processing → agent/sac}/__init__.py +0 -0
  517. /solace_agent_mesh/{agents/image_processing/actions → agent/utils}/__init__.py +0 -0
  518. /solace_agent_mesh/{agents/web_request → config_portal/backend/plugin_catalog}/__init__.py +0 -0
  519. /solace_agent_mesh/{agents/web_request/actions → evaluation}/__init__.py +0 -0
  520. /solace_agent_mesh/gateway/{components → http_sse}/__init__.py +0 -0
  521. {solace_agent_mesh-0.2.4.dist-info → solace_agent_mesh-1.0.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,667 @@
1
+ """
2
+ Refactored evaluator with improved structure and readability.
3
+ This module evaluates AI model performance against test cases using multiple evaluation strategies.
4
+ """
5
+
6
+ import concurrent.futures
7
+ import json
8
+ import os
9
+ import re
10
+ import sys
11
+ from abc import ABC, abstractmethod
12
+ from collections import defaultdict
13
+ from dataclasses import dataclass, field
14
+ from typing import Dict, List, Optional, Any, Tuple
15
+ import logging
16
+
17
+ import numpy as np
18
+ from rouge import Rouge
19
+ import litellm
20
+
21
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
22
+ from evaluation.config_loader import ConfigLoader
23
+ from evaluation.test_case_loader import load_test_case
24
+
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
29
+
30
+
31
+ @dataclass
32
+ class EvaluationResult:
33
+ """Represents the evaluation result for a single run."""
34
+
35
+ run_number: int
36
+ test_case_id: str
37
+ test_case_path: str
38
+ tool_match_score: Optional[float] = None
39
+ response_match_score: Optional[float] = None
40
+ llm_eval_score: Optional[float] = None
41
+ llm_eval_reasoning: Optional[str] = None
42
+ duration_seconds: Optional[float] = None
43
+ errors: List[str] = field(default_factory=list)
44
+
45
+ def to_dict(self) -> Dict[str, Any]:
46
+ """Convert to dictionary format for JSON serialization."""
47
+ result = {
48
+ "run": self.run_number,
49
+ "test_case_id": self.test_case_id,
50
+ "test_case_path": self.test_case_path,
51
+ "duration_seconds": self.duration_seconds,
52
+ }
53
+
54
+ if self.tool_match_score is not None:
55
+ result["tool_match"] = self.tool_match_score
56
+
57
+ if self.response_match_score is not None:
58
+ result["response_match"] = self.response_match_score
59
+
60
+ if self.llm_eval_score is not None:
61
+ result["llm_eval"] = {
62
+ "score": self.llm_eval_score,
63
+ "reasoning": self.llm_eval_reasoning,
64
+ }
65
+
66
+ if self.errors:
67
+ result["errors"] = self.errors
68
+
69
+ return result
70
+
71
+
72
+ @dataclass
73
+ class ScoreStatistics:
74
+ """Statistical summary of evaluation scores."""
75
+
76
+ average: float
77
+ distribution: Dict[str, float]
78
+
79
+ @classmethod
80
+ def from_scores(cls, scores: List[float]) -> "ScoreStatistics":
81
+ """Create statistics from a list of scores."""
82
+ if not scores:
83
+ return cls(
84
+ average=0.0,
85
+ distribution={"min": 0.0, "q1": 0.0, "q2": 0.0, "q3": 0.0, "max": 0.0},
86
+ )
87
+
88
+ return cls(
89
+ average=float(np.mean(scores)),
90
+ distribution={
91
+ "min": float(np.min(scores)),
92
+ "q1": float(np.percentile(scores, 25)),
93
+ "q2": float(np.median(scores)),
94
+ "q3": float(np.percentile(scores, 75)),
95
+ "max": float(np.max(scores)),
96
+ },
97
+ )
98
+
99
+
100
+ @dataclass
101
+ class TestCaseResults:
102
+ """Aggregated results for a test case across multiple runs."""
103
+
104
+ test_case_id: str
105
+ category: str
106
+ runs: List[EvaluationResult]
107
+ average_duration: float
108
+ tool_match_scores: ScoreStatistics
109
+ response_match_scores: ScoreStatistics
110
+ llm_eval_scores: ScoreStatistics
111
+
112
+ def to_dict(self) -> Dict[str, Any]:
113
+ """Convert to dictionary format for JSON serialization."""
114
+ return {
115
+ "test_case_id": self.test_case_id,
116
+ "category": self.category,
117
+ "runs": [run.to_dict() for run in self.runs],
118
+ "average_duration": self.average_duration,
119
+ "tool_match_scores": {
120
+ "average": self.tool_match_scores.average,
121
+ "distribution": self.tool_match_scores.distribution,
122
+ },
123
+ "response_match_scores": {
124
+ "average": self.response_match_scores.average,
125
+ "distribution": self.response_match_scores.distribution,
126
+ },
127
+ "llm_eval_scores": {
128
+ "average": self.llm_eval_scores.average,
129
+ "distribution": self.llm_eval_scores.distribution,
130
+ },
131
+ }
132
+
133
+
134
+ @dataclass
135
+ class ModelResults:
136
+ """Complete evaluation results for a model."""
137
+
138
+ model_name: str
139
+ total_execution_time: Optional[float]
140
+ test_cases: List[TestCaseResults]
141
+
142
+ def to_dict(self) -> Dict[str, Any]:
143
+ """Convert to dictionary format for JSON serialization."""
144
+ return {
145
+ "model_name": self.model_name,
146
+ "total_execution_time": self.total_execution_time,
147
+ "test_cases": [tc.to_dict() for tc in self.test_cases],
148
+ }
149
+
150
+
151
+ class ConfigurationService:
152
+ """Handles configuration loading and validation."""
153
+
154
+ def __init__(self, config_path: str):
155
+ self.config_loader = ConfigLoader(config_path)
156
+ self._config_cache = None
157
+ self._evaluation_settings_cache = None
158
+
159
+ def get_config(self) -> Dict[str, Any]:
160
+ """Get the main configuration."""
161
+ if self._config_cache is None:
162
+ self._config_cache = self.config_loader.load_config()
163
+ return self._config_cache
164
+
165
+ def get_evaluation_settings(self) -> Dict[str, Any]:
166
+ """Get evaluation settings."""
167
+ if self._evaluation_settings_cache is None:
168
+ self._evaluation_settings_cache = (
169
+ self.config_loader.get_evaluation_settings()
170
+ )
171
+ return self._evaluation_settings_cache
172
+
173
+ def get_results_path(self) -> str:
174
+ """Get the base results path."""
175
+ config = self.get_config()
176
+ results_dir_name = config["results_dir_name"]
177
+ return os.path.join(SCRIPT_DIR, "results", results_dir_name)
178
+
179
+
180
+ class FileService:
181
+ """Handles file I/O operations."""
182
+
183
+ @staticmethod
184
+ def load_json(filepath: str) -> Any:
185
+ """Load JSON data from file."""
186
+ try:
187
+ with open(filepath, "r") as f:
188
+ return json.load(f)
189
+ except (FileNotFoundError, json.JSONDecodeError) as e:
190
+ logger.error(f"Failed to load JSON from {filepath}: {e}")
191
+ raise
192
+
193
+ @staticmethod
194
+ def save_json(data: Any, filepath: str):
195
+ """Save data as JSON to file."""
196
+ try:
197
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
198
+ with open(filepath, "w") as f:
199
+ json.dump(data, f, indent=4)
200
+ except Exception as e:
201
+ logger.error(f"Failed to save JSON to {filepath}: {e}")
202
+ raise
203
+
204
+ @staticmethod
205
+ def file_exists(filepath: str) -> bool:
206
+ """Check if file exists."""
207
+ return os.path.exists(filepath)
208
+
209
+
210
+ class StatisticsService:
211
+ """Handles statistical calculations and aggregations."""
212
+
213
+ @staticmethod
214
+ def calculate_score_statistics(scores: List[float]) -> ScoreStatistics:
215
+ """Calculate statistical summary for a list of scores."""
216
+ return ScoreStatistics.from_scores(scores)
217
+
218
+ @staticmethod
219
+ def calculate_average_duration(durations: List[float]) -> float:
220
+ """Calculate average duration from a list of durations."""
221
+ if not durations:
222
+ return 0.0
223
+ return float(np.mean(durations))
224
+
225
+
226
+ class EvaluationStrategy(ABC):
227
+ """Abstract base class for evaluation strategies."""
228
+
229
+ @abstractmethod
230
+ def evaluate(
231
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
232
+ ) -> Optional[float]:
233
+ """Evaluate a test case run and return a score."""
234
+ pass
235
+
236
+
237
+ class ToolMatchEvaluator(EvaluationStrategy):
238
+ """Evaluates tool usage against expected tools."""
239
+
240
+ def evaluate(
241
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
242
+ ) -> Optional[float]:
243
+ """Evaluate tool matching score."""
244
+ try:
245
+ expected_tools = test_case["evaluation"]["expected_tools"]
246
+ actual_tools = [
247
+ tool["tool_name"] for tool in summary_data.get("tool_calls", [])
248
+ ]
249
+
250
+ expected_set = set(expected_tools)
251
+ actual_set = set(actual_tools)
252
+
253
+ if not expected_set:
254
+ return 1.0
255
+
256
+ found_tools = expected_set.intersection(actual_set)
257
+ return len(found_tools) / len(expected_set)
258
+
259
+ except (KeyError, TypeError) as e:
260
+ logger.warning(f"Error in tool match evaluation: {e}")
261
+ return None
262
+
263
+
264
+ class ResponseMatchEvaluator(EvaluationStrategy):
265
+ """Evaluates response quality using ROUGE metrics."""
266
+
267
+ def __init__(self):
268
+ self.rouge = Rouge()
269
+
270
+ def evaluate(
271
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
272
+ ) -> Optional[float]:
273
+ """Evaluate response matching score using a weighted ROUGE average."""
274
+ try:
275
+ expected_response = test_case["evaluation"]["expected_response"]
276
+ actual_response = summary_data.get("final_message", "")
277
+
278
+ if not actual_response or not expected_response:
279
+ return 0.0
280
+
281
+ scores = self.rouge.get_scores(actual_response, expected_response)[0]
282
+
283
+ # Weighted average of ROUGE-1, ROUGE-2, and ROUGE-L f-scores
284
+ rouge_1_f = scores.get("rouge-1", {}).get("f", 0.0)
285
+ rouge_2_f = scores.get("rouge-2", {}).get("f", 0.0)
286
+ rouge_l_f = scores.get("rouge-l", {}).get("f", 0.0)
287
+
288
+ weighted_score = (0.2 * rouge_1_f) + (0.3 * rouge_2_f) + (0.5 * rouge_l_f)
289
+
290
+ return weighted_score
291
+
292
+ except (ValueError, KeyError, TypeError) as e:
293
+ logger.warning(f"Error in response match evaluation: {e}")
294
+ return 0.0
295
+
296
+
297
+ class LLMEvaluator(EvaluationStrategy):
298
+ """Evaluates responses using an LLM judge."""
299
+
300
+ def __init__(self, llm_config: Dict[str, Any]):
301
+ self.model = llm_config.get("LLM_SERVICE_PLANNING_MODEL_NAME")
302
+ self.api_key = llm_config.get("LLM_SERVICE_API_KEY")
303
+ self.api_base = llm_config.get("LLM_SERVICE_ENDPOINT")
304
+
305
+ if not all([self.model, self.api_key, self.api_base]):
306
+ raise ValueError(
307
+ "LLM evaluator requires model, api_key, and api_base configuration"
308
+ )
309
+
310
+ def evaluate(
311
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
312
+ ) -> Optional[Dict[str, Any]]:
313
+ """Evaluate response using LLM and return score with reasoning."""
314
+ try:
315
+ query = test_case["query"]
316
+ expected_response = test_case["evaluation"]["expected_response"]
317
+ actual_response = summary_data.get("final_message", "")
318
+ criterion = test_case["evaluation"]["criterion"]
319
+ input_artifacts = summary_data.get("input_artifacts", [])
320
+ output_artifacts = summary_data.get("output_artifacts", [])
321
+
322
+ prompt = self._build_evaluation_prompt(
323
+ query,
324
+ expected_response,
325
+ actual_response,
326
+ criterion,
327
+ input_artifacts,
328
+ output_artifacts,
329
+ )
330
+
331
+ response = litellm.completion(
332
+ model=self.model,
333
+ messages=[{"role": "user", "content": prompt}],
334
+ api_key=self.api_key,
335
+ base_url=self.api_base,
336
+ )
337
+
338
+ response_content = response.choices[0].message.content.strip()
339
+ score = self._extract_score(response_content)
340
+ reasoning = self._extract_reasoning(response_content)
341
+
342
+ return {"score": score, "reasoning": reasoning}
343
+
344
+ except Exception as e:
345
+ logger.error(f"Error in LLM evaluation: {e}")
346
+ return None
347
+
348
+ def _build_evaluation_prompt(
349
+ self,
350
+ query: str,
351
+ expected_response: str,
352
+ actual_response: str,
353
+ criterion: str,
354
+ input_artifacts: List[Dict],
355
+ output_artifacts: List[Dict],
356
+ ) -> str:
357
+ """Build the evaluation prompt for the LLM."""
358
+ return f"""
359
+ Original Query: {query}
360
+ Expected Response: {expected_response}
361
+ Actual Response: {actual_response}
362
+ Criterion: {criterion}
363
+ Input Artifacts: {input_artifacts}
364
+ Output Artifacts: {output_artifacts}
365
+
366
+ Based on the criterion, please evaluate the actual response.
367
+ Format your response exactly as:
368
+ Score: [0.0-1.0]
369
+ Reasoning: [Your detailed explanation of why you gave this score, considering both the response and any artifacts created]
370
+
371
+ Provide a score from 0.0 to 1.0 where:
372
+ - 1.0 = Excellent: Fully meets the criterion and expectations
373
+ - 0.8-0.9 = Good: Mostly meets the criterion with minor issues
374
+ - 0.6-0.7 = Adequate: Partially meets the criterion but has notable gaps
375
+ - 0.4-0.5 = Poor: Minimally meets the criterion with significant issues
376
+ - 0.0-0.3 = Very Poor: Fails to meet the criterion
377
+ """
378
+
379
+ def _extract_score(self, llm_response: str) -> float:
380
+ """Extract numerical score from LLM response."""
381
+ # Try to find "Score: X.X" pattern first
382
+ score_match = re.search(
383
+ r"Score:\s*([0-9]*\.?[0-9]+)", llm_response, re.IGNORECASE
384
+ )
385
+ if score_match:
386
+ try:
387
+ score = float(score_match.group(1))
388
+ return max(0.0, min(1.0, score))
389
+ except ValueError:
390
+ pass
391
+
392
+ # Fallback: look for any number between 0 and 1
393
+ number_match = re.search(r"\b([0-1]\.?[0-9]*)\b", llm_response)
394
+ if number_match:
395
+ try:
396
+ score = float(number_match.group(1))
397
+ if 0.0 <= score <= 1.0:
398
+ return score
399
+ except ValueError:
400
+ pass
401
+
402
+ return 0.0
403
+
404
+ def _extract_reasoning(self, llm_response: str) -> str:
405
+ """Extract reasoning from LLM response."""
406
+ reasoning_match = re.search(
407
+ r"Reasoning:\s*(.+)", llm_response, re.IGNORECASE | re.DOTALL
408
+ )
409
+ if reasoning_match:
410
+ return reasoning_match.group(1).strip()
411
+
412
+ return llm_response.strip()
413
+
414
+
415
+ class RunEvaluator:
416
+ """Evaluates individual test runs."""
417
+
418
+ def __init__(self, evaluation_settings: Dict[str, Any]):
419
+ self.evaluation_settings = evaluation_settings
420
+ self.file_service = FileService()
421
+
422
+ # Initialize evaluators based on settings
423
+ self.tool_evaluator = (
424
+ ToolMatchEvaluator()
425
+ if evaluation_settings["tool_match"]["enabled"]
426
+ else None
427
+ )
428
+ self.response_evaluator = (
429
+ ResponseMatchEvaluator()
430
+ if evaluation_settings["response_match"]["enabled"]
431
+ else None
432
+ )
433
+
434
+ self.llm_evaluator = None
435
+ if evaluation_settings["llm_evaluator"]["enabled"]:
436
+ try:
437
+ llm_config = evaluation_settings["llm_evaluator"]["env"]
438
+ self.llm_evaluator = LLMEvaluator(llm_config)
439
+ except Exception as e:
440
+ logger.error(f"Failed to initialize LLM evaluator: {e}")
441
+
442
+ def evaluate_run(
443
+ self,
444
+ run_number: int,
445
+ run_path: str,
446
+ test_case: Dict[str, Any],
447
+ test_case_path: str,
448
+ ) -> Optional[EvaluationResult]:
449
+ """Evaluate a single test run."""
450
+ logger.info(
451
+ f" - Evaluating run {run_number} for test case {test_case['test_case_id']}"
452
+ )
453
+
454
+ # Load summary data
455
+ summary_path = os.path.join(run_path, "summary.json")
456
+ if not self.file_service.file_exists(summary_path):
457
+ logger.warning(
458
+ f" Summary file not found for run {run_number}, skipping."
459
+ )
460
+ return None
461
+
462
+ try:
463
+ summary_data = self.file_service.load_json(summary_path)
464
+ except Exception as e:
465
+ logger.error(f" Error loading summary.json for run {run_number}: {e}")
466
+ return None
467
+
468
+ # Create evaluation result
469
+ result = EvaluationResult(
470
+ run_number=run_number,
471
+ test_case_id=test_case["test_case_id"],
472
+ test_case_path=test_case_path,
473
+ duration_seconds=summary_data.get("duration_seconds"),
474
+ )
475
+
476
+ # Run evaluations
477
+ if self.tool_evaluator:
478
+ result.tool_match_score = self.tool_evaluator.evaluate(
479
+ test_case, summary_data
480
+ )
481
+
482
+ if self.response_evaluator:
483
+ result.response_match_score = self.response_evaluator.evaluate(
484
+ test_case, summary_data
485
+ )
486
+
487
+ if self.llm_evaluator:
488
+ llm_result = self.llm_evaluator.evaluate(test_case, summary_data)
489
+ if llm_result:
490
+ result.llm_eval_score = llm_result["score"]
491
+ result.llm_eval_reasoning = llm_result["reasoning"]
492
+
493
+ return result
494
+
495
+
496
+ class ModelEvaluator:
497
+ """Evaluates all runs for a single model."""
498
+
499
+ def __init__(self, config: Dict[str, Any], evaluation_settings: Dict[str, Any]):
500
+ self.config = config
501
+ self.evaluation_settings = evaluation_settings
502
+ self.run_evaluator = RunEvaluator(evaluation_settings)
503
+ self.statistics_service = StatisticsService()
504
+
505
+ def evaluate_model(self, model_name: str, base_results_path: str) -> ModelResults:
506
+ """Evaluate all test cases for a model."""
507
+ logger.info(f"Evaluating model: {model_name}")
508
+
509
+ model_results_path = os.path.join(base_results_path, model_name)
510
+
511
+ # Collect all evaluation tasks
512
+ tasks = self._collect_evaluation_tasks(model_results_path)
513
+
514
+ # Run evaluations in parallel
515
+ model_results_data = defaultdict(list)
516
+ with concurrent.futures.ThreadPoolExecutor() as executor:
517
+ future_to_run = {
518
+ executor.submit(self.run_evaluator.evaluate_run, *task): task
519
+ for task in tasks
520
+ }
521
+
522
+ for future in concurrent.futures.as_completed(future_to_run):
523
+ try:
524
+ result = future.result()
525
+ if result:
526
+ model_results_data[result.test_case_id].append(result)
527
+ except Exception as e:
528
+ logger.error(f"An error occurred during evaluation: {e}")
529
+
530
+ # Aggregate results by test case
531
+ test_case_results = []
532
+ for test_case_id, runs in model_results_data.items():
533
+ if runs:
534
+ test_case_result = self._aggregate_test_case_results(test_case_id, runs)
535
+ test_case_results.append(test_case_result)
536
+
537
+ return ModelResults(
538
+ model_name=model_name,
539
+ total_execution_time=None, # Will be set by orchestrator
540
+ test_cases=test_case_results,
541
+ )
542
+
543
+ def _collect_evaluation_tasks(
544
+ self, model_results_path: str
545
+ ) -> List[Tuple[int, str, Dict[str, Any], str]]:
546
+ """Collect all evaluation tasks for the model."""
547
+ tasks = []
548
+
549
+ for test_case_path in self.config["test_cases"]:
550
+ test_case = load_test_case(test_case_path)
551
+ test_case_id = test_case["test_case_id"]
552
+ test_case_results_path = os.path.join(model_results_path, test_case_id)
553
+
554
+ for i in range(1, self.config["runs"] + 1):
555
+ run_path = os.path.join(test_case_results_path, f"run_{i}")
556
+ tasks.append((i, run_path, test_case, test_case_path))
557
+
558
+ return tasks
559
+
560
+ def _aggregate_test_case_results(
561
+ self, test_case_id: str, runs: List[EvaluationResult]
562
+ ) -> TestCaseResults:
563
+ """Aggregate results for a test case across multiple runs."""
564
+ # Load test case to get category
565
+ test_case_path = runs[0].test_case_path
566
+ test_case = load_test_case(test_case_path)
567
+
568
+ # Extract scores for statistics
569
+ tool_scores = [
570
+ r.tool_match_score for r in runs if r.tool_match_score is not None
571
+ ]
572
+ response_scores = [
573
+ r.response_match_score for r in runs if r.response_match_score is not None
574
+ ]
575
+ llm_scores = [r.llm_eval_score for r in runs if r.llm_eval_score is not None]
576
+ duration_scores = [
577
+ r.duration_seconds for r in runs if r.duration_seconds is not None
578
+ ]
579
+
580
+ return TestCaseResults(
581
+ test_case_id=test_case_id,
582
+ category=test_case["category"],
583
+ runs=runs,
584
+ average_duration=self.statistics_service.calculate_average_duration(
585
+ duration_scores
586
+ ),
587
+ tool_match_scores=self.statistics_service.calculate_score_statistics(
588
+ tool_scores
589
+ ),
590
+ response_match_scores=self.statistics_service.calculate_score_statistics(
591
+ response_scores
592
+ ),
593
+ llm_eval_scores=self.statistics_service.calculate_score_statistics(
594
+ llm_scores
595
+ ),
596
+ )
597
+
598
+
599
+ class ResultsWriter:
600
+ """Handles writing evaluation results to files."""
601
+
602
+ def __init__(self):
603
+ self.file_service = FileService()
604
+
605
+ def write_model_results(self, model_results: ModelResults, base_results_path: str):
606
+ """Write model results to file."""
607
+ results_path = os.path.join(
608
+ base_results_path, model_results.model_name, "results.json"
609
+ )
610
+ self.file_service.save_json(model_results.to_dict(), results_path)
611
+ logger.info(
612
+ f"Results for model {model_results.model_name} written to {results_path}"
613
+ )
614
+
615
+
616
+ class EvaluationOrchestrator:
617
+ """Main orchestrator that coordinates the entire evaluation process."""
618
+
619
+ def __init__(self, config_path: str):
620
+ self.config_service = ConfigurationService(config_path)
621
+ self.results_writer = ResultsWriter()
622
+
623
+ def run_evaluation(
624
+ self,
625
+ base_results_path: str,
626
+ model_execution_times: Optional[Dict[str, float]] = None,
627
+ ):
628
+ """Main entry point for the evaluation process."""
629
+ logger.info("--- Starting evaluation ---")
630
+
631
+ if model_execution_times is None:
632
+ model_execution_times = {}
633
+
634
+ config = self.config_service.get_config()
635
+ evaluation_settings = self.config_service.get_evaluation_settings()
636
+
637
+ model_evaluator = ModelEvaluator(config, evaluation_settings)
638
+
639
+ for model_config in config["llm_models"]:
640
+ model_name = model_config["name"]
641
+
642
+ # Evaluate the model
643
+ model_results = model_evaluator.evaluate_model(
644
+ model_name, base_results_path
645
+ )
646
+
647
+ # Add execution time if available
648
+ execution_time = model_execution_times.get(model_name)
649
+ if execution_time is not None:
650
+ model_results.total_execution_time = execution_time
651
+
652
+ # Write results to file
653
+ self.results_writer.write_model_results(model_results, base_results_path)
654
+
655
+ logger.info("--- Evaluation finished ---")
656
+
657
+
658
+ def main(config_path: str = "evaluation/test_suite_config.json"):
659
+ """Main entry point for command-line usage."""
660
+ orchestrator = EvaluationOrchestrator(config_path)
661
+ results_path = orchestrator.config_service.get_results_path()
662
+ orchestrator.run_evaluation(results_path)
663
+
664
+
665
+ if __name__ == "__main__":
666
+ # This will be updated later to parse CLI args.
667
+ main()