solace-agent-mesh 0.2.3__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of solace-agent-mesh might be problematic. Click here for more details.

Files changed (518) hide show
  1. solace_agent_mesh/agent/adk/adk_llm.txt +93 -0
  2. solace_agent_mesh/agent/adk/app_llm_agent.py +26 -0
  3. solace_agent_mesh/agent/adk/callbacks.py +1694 -0
  4. solace_agent_mesh/agent/adk/filesystem_artifact_service.py +381 -0
  5. solace_agent_mesh/agent/adk/invocation_monitor.py +295 -0
  6. solace_agent_mesh/agent/adk/models/lite_llm.py +872 -0
  7. solace_agent_mesh/agent/adk/models/models_llm.txt +94 -0
  8. solace_agent_mesh/agent/adk/runner.py +353 -0
  9. solace_agent_mesh/agent/adk/services.py +240 -0
  10. solace_agent_mesh/agent/adk/setup.py +751 -0
  11. solace_agent_mesh/agent/adk/stream_parser.py +214 -0
  12. solace_agent_mesh/agent/adk/tool_wrapper.py +139 -0
  13. solace_agent_mesh/agent/agent_llm.txt +41 -0
  14. solace_agent_mesh/agent/protocol/event_handlers.py +1469 -0
  15. solace_agent_mesh/agent/protocol/protocol_llm.txt +21 -0
  16. solace_agent_mesh/agent/sac/app.py +640 -0
  17. solace_agent_mesh/agent/sac/component.py +3388 -0
  18. solace_agent_mesh/agent/sac/patch_adk.py +111 -0
  19. solace_agent_mesh/agent/sac/sac_llm.txt +105 -0
  20. solace_agent_mesh/agent/sac/task_execution_context.py +176 -0
  21. solace_agent_mesh/agent/testing/__init__.py +3 -0
  22. solace_agent_mesh/agent/testing/debug_utils.py +135 -0
  23. solace_agent_mesh/agent/testing/testing_llm.txt +90 -0
  24. solace_agent_mesh/agent/tools/__init__.py +14 -0
  25. solace_agent_mesh/agent/tools/audio_tools.py +1622 -0
  26. solace_agent_mesh/agent/tools/builtin_artifact_tools.py +1954 -0
  27. solace_agent_mesh/agent/tools/builtin_data_analysis_tools.py +238 -0
  28. solace_agent_mesh/agent/tools/general_agent_tools.py +569 -0
  29. solace_agent_mesh/agent/tools/image_tools.py +1184 -0
  30. solace_agent_mesh/agent/tools/peer_agent_tool.py +289 -0
  31. solace_agent_mesh/agent/tools/registry.py +36 -0
  32. solace_agent_mesh/agent/tools/test_tools.py +135 -0
  33. solace_agent_mesh/agent/tools/tool_definition.py +45 -0
  34. solace_agent_mesh/agent/tools/tools_llm.txt +104 -0
  35. solace_agent_mesh/agent/tools/web_tools.py +381 -0
  36. solace_agent_mesh/agent/utils/artifact_helpers.py +927 -0
  37. solace_agent_mesh/agent/utils/config_parser.py +47 -0
  38. solace_agent_mesh/agent/utils/context_helpers.py +60 -0
  39. solace_agent_mesh/agent/utils/utils_llm.txt +153 -0
  40. solace_agent_mesh/assets/docs/404.html +16 -0
  41. solace_agent_mesh/assets/docs/assets/css/styles.906a1503.css +1 -0
  42. solace_agent_mesh/assets/docs/assets/images/Solace_AI_Framework_With_Broker-85f0a306a9bcdd20b390b7a949f6d862.png +0 -0
  43. solace_agent_mesh/assets/docs/assets/images/sac-flows-80d5b603c6aafd33e87945680ce0abf3.png +0 -0
  44. solace_agent_mesh/assets/docs/assets/images/sac_parts_of_a_component-cb3d0424b1d0c17734c5435cca6b4082.png +0 -0
  45. solace_agent_mesh/assets/docs/assets/js/04989206.674a8007.js +1 -0
  46. solace_agent_mesh/assets/docs/assets/js/0e682baa.79f0ab22.js +1 -0
  47. solace_agent_mesh/assets/docs/assets/js/1001.0182a8bd.js +1 -0
  48. solace_agent_mesh/assets/docs/assets/js/1023fc19.015679ca.js +1 -0
  49. solace_agent_mesh/assets/docs/assets/js/1039.0bd46aa1.js +1 -0
  50. solace_agent_mesh/assets/docs/assets/js/149.b797a808.js +1 -0
  51. solace_agent_mesh/assets/docs/assets/js/1523c6b4.91c7bc01.js +1 -0
  52. solace_agent_mesh/assets/docs/assets/js/165.6a39807d.js +2 -0
  53. solace_agent_mesh/assets/docs/assets/js/165.6a39807d.js.LICENSE.txt +9 -0
  54. solace_agent_mesh/assets/docs/assets/js/166ab619.7d97ccaf.js +1 -0
  55. solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js +2 -0
  56. solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js.LICENSE.txt +7 -0
  57. solace_agent_mesh/assets/docs/assets/js/1c6e87d2.23bccffb.js +1 -0
  58. solace_agent_mesh/assets/docs/assets/js/2130.ab9fd314.js +1 -0
  59. solace_agent_mesh/assets/docs/assets/js/21ceee5f.614fa8dd.js +1 -0
  60. solace_agent_mesh/assets/docs/assets/js/2237.5e477fc6.js +1 -0
  61. solace_agent_mesh/assets/docs/assets/js/2334.622a6395.js +1 -0
  62. solace_agent_mesh/assets/docs/assets/js/2a9cab12.8909df92.js +1 -0
  63. solace_agent_mesh/assets/docs/assets/js/3219.adc1d663.js +1 -0
  64. solace_agent_mesh/assets/docs/assets/js/332e10b5.7a103f42.js +1 -0
  65. solace_agent_mesh/assets/docs/assets/js/3624.b524e433.js +1 -0
  66. solace_agent_mesh/assets/docs/assets/js/375.708d48db.js +1 -0
  67. solace_agent_mesh/assets/docs/assets/js/3834.b6cd790e.js +1 -0
  68. solace_agent_mesh/assets/docs/assets/js/3d406171.f722eaf5.js +1 -0
  69. solace_agent_mesh/assets/docs/assets/js/4250.95455b28.js +1 -0
  70. solace_agent_mesh/assets/docs/assets/js/42b3f8d8.36090198.js +1 -0
  71. solace_agent_mesh/assets/docs/assets/js/4356.d169ab5b.js +1 -0
  72. solace_agent_mesh/assets/docs/assets/js/442a8107.5ba94b65.js +1 -0
  73. solace_agent_mesh/assets/docs/assets/js/4458.518e66fa.js +1 -0
  74. solace_agent_mesh/assets/docs/assets/js/4488.c7cc3442.js +1 -0
  75. solace_agent_mesh/assets/docs/assets/js/4494.6ee23046.js +1 -0
  76. solace_agent_mesh/assets/docs/assets/js/4855.fc4444b6.js +1 -0
  77. solace_agent_mesh/assets/docs/assets/js/4866.22daefc0.js +1 -0
  78. solace_agent_mesh/assets/docs/assets/js/4950.ca4caeda.js +1 -0
  79. solace_agent_mesh/assets/docs/assets/js/4c2787c2.66ee00e9.js +1 -0
  80. solace_agent_mesh/assets/docs/assets/js/5388.7a136447.js +1 -0
  81. solace_agent_mesh/assets/docs/assets/js/55f47984.c484bf96.js +1 -0
  82. solace_agent_mesh/assets/docs/assets/js/5607.081356f8.js +1 -0
  83. solace_agent_mesh/assets/docs/assets/js/5864.b0d0e9de.js +1 -0
  84. solace_agent_mesh/assets/docs/assets/js/5b4258a4.bda20761.js +1 -0
  85. solace_agent_mesh/assets/docs/assets/js/5e95c892.558d5167.js +1 -0
  86. solace_agent_mesh/assets/docs/assets/js/6143.0a1464c9.js +1 -0
  87. solace_agent_mesh/assets/docs/assets/js/6395.e9c73649.js +1 -0
  88. solace_agent_mesh/assets/docs/assets/js/6796.51d2c9b7.js +1 -0
  89. solace_agent_mesh/assets/docs/assets/js/6976.379be23b.js +1 -0
  90. solace_agent_mesh/assets/docs/assets/js/6978.ee0b945c.js +1 -0
  91. solace_agent_mesh/assets/docs/assets/js/7040.cb436723.js +1 -0
  92. solace_agent_mesh/assets/docs/assets/js/7195.412f418a.js +1 -0
  93. solace_agent_mesh/assets/docs/assets/js/7280.3fb73bdb.js +1 -0
  94. solace_agent_mesh/assets/docs/assets/js/768e31b0.a12673db.js +1 -0
  95. solace_agent_mesh/assets/docs/assets/js/7845.e33e7c4c.js +1 -0
  96. solace_agent_mesh/assets/docs/assets/js/7900.69516146.js +1 -0
  97. solace_agent_mesh/assets/docs/assets/js/8356.8a379c04.js +1 -0
  98. solace_agent_mesh/assets/docs/assets/js/85387663.6bf41934.js +1 -0
  99. solace_agent_mesh/assets/docs/assets/js/8567.4732c6b7.js +1 -0
  100. solace_agent_mesh/assets/docs/assets/js/8573.cb04eda5.js +1 -0
  101. solace_agent_mesh/assets/docs/assets/js/8577.1d54e766.js +1 -0
  102. solace_agent_mesh/assets/docs/assets/js/8591.d7c16be6.js +2 -0
  103. solace_agent_mesh/assets/docs/assets/js/8591.d7c16be6.js.LICENSE.txt +61 -0
  104. solace_agent_mesh/assets/docs/assets/js/8709.7ecd4047.js +1 -0
  105. solace_agent_mesh/assets/docs/assets/js/8731.49e930c2.js +1 -0
  106. solace_agent_mesh/assets/docs/assets/js/8908.f9d1b506.js +1 -0
  107. solace_agent_mesh/assets/docs/assets/js/9157.b4093d07.js +1 -0
  108. solace_agent_mesh/assets/docs/assets/js/9278.a4fd875d.js +1 -0
  109. solace_agent_mesh/assets/docs/assets/js/945fb41e.74d728aa.js +1 -0
  110. solace_agent_mesh/assets/docs/assets/js/9616.b75c2f6d.js +1 -0
  111. solace_agent_mesh/assets/docs/assets/js/9793.c6d16376.js +1 -0
  112. solace_agent_mesh/assets/docs/assets/js/9eff14a2.1bf8f61c.js +1 -0
  113. solace_agent_mesh/assets/docs/assets/js/a3a92b25.26ca071f.js +1 -0
  114. solace_agent_mesh/assets/docs/assets/js/a7bd4aaa.2204d2f7.js +1 -0
  115. solace_agent_mesh/assets/docs/assets/js/a94703ab.0438dbc2.js +1 -0
  116. solace_agent_mesh/assets/docs/assets/js/aba21aa0.c42a534c.js +1 -0
  117. solace_agent_mesh/assets/docs/assets/js/aba87c2f.d3e2dcc3.js +1 -0
  118. solace_agent_mesh/assets/docs/assets/js/ae4415af.8e279b5d.js +1 -0
  119. solace_agent_mesh/assets/docs/assets/js/b7006a3a.40b10c9d.js +1 -0
  120. solace_agent_mesh/assets/docs/assets/js/bac0be12.f50d9bac.js +1 -0
  121. solace_agent_mesh/assets/docs/assets/js/bb2ef573.207e6990.js +1 -0
  122. solace_agent_mesh/assets/docs/assets/js/c2c06897.63b76e9e.js +1 -0
  123. solace_agent_mesh/assets/docs/assets/js/cc969b05.954186d4.js +1 -0
  124. solace_agent_mesh/assets/docs/assets/js/cd3d4052.ca6eed8c.js +1 -0
  125. solace_agent_mesh/assets/docs/assets/js/ced92a13.fb92e7ca.js +1 -0
  126. solace_agent_mesh/assets/docs/assets/js/cee5d587.f5b73ca1.js +1 -0
  127. solace_agent_mesh/assets/docs/assets/js/f284c35a.ecc3d195.js +1 -0
  128. solace_agent_mesh/assets/docs/assets/js/f897a61a.2c2e152c.js +1 -0
  129. solace_agent_mesh/assets/docs/assets/js/fbfa3e75.aca209c9.js +1 -0
  130. solace_agent_mesh/assets/docs/assets/js/main.7ed3319f.js +2 -0
  131. solace_agent_mesh/assets/docs/assets/js/main.7ed3319f.js.LICENSE.txt +81 -0
  132. solace_agent_mesh/assets/docs/assets/js/runtime~main.d9520ae2.js +1 -0
  133. solace_agent_mesh/assets/docs/docs/documentation/concepts/agents/index.html +128 -0
  134. solace_agent_mesh/assets/docs/docs/documentation/concepts/architecture/index.html +91 -0
  135. solace_agent_mesh/assets/docs/docs/documentation/concepts/cli/index.html +201 -0
  136. solace_agent_mesh/assets/docs/docs/documentation/concepts/gateways/index.html +91 -0
  137. solace_agent_mesh/assets/docs/docs/documentation/concepts/orchestrator/index.html +55 -0
  138. solace_agent_mesh/assets/docs/docs/documentation/concepts/plugins/index.html +82 -0
  139. solace_agent_mesh/assets/docs/docs/documentation/deployment/debugging/index.html +60 -0
  140. solace_agent_mesh/assets/docs/docs/documentation/deployment/deploy/index.html +48 -0
  141. solace_agent_mesh/assets/docs/docs/documentation/deployment/observability/index.html +54 -0
  142. solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +17 -0
  143. solace_agent_mesh/assets/docs/docs/documentation/getting-started/component-overview/index.html +45 -0
  144. solace_agent_mesh/assets/docs/docs/documentation/getting-started/installation/index.html +76 -0
  145. solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +150 -0
  146. solace_agent_mesh/assets/docs/docs/documentation/getting-started/quick-start/index.html +54 -0
  147. solace_agent_mesh/assets/docs/docs/documentation/tutorials/bedrock-agents/index.html +267 -0
  148. solace_agent_mesh/assets/docs/docs/documentation/tutorials/custom-agent/index.html +136 -0
  149. solace_agent_mesh/assets/docs/docs/documentation/tutorials/event-mesh-gateway/index.html +116 -0
  150. solace_agent_mesh/assets/docs/docs/documentation/tutorials/mcp-integration/index.html +80 -0
  151. solace_agent_mesh/assets/docs/docs/documentation/tutorials/mongodb-integration/index.html +164 -0
  152. solace_agent_mesh/assets/docs/docs/documentation/tutorials/rest-gateway/index.html +57 -0
  153. solace_agent_mesh/assets/docs/docs/documentation/tutorials/slack-integration/index.html +72 -0
  154. solace_agent_mesh/assets/docs/docs/documentation/tutorials/sql-database/index.html +102 -0
  155. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/artifact-management/index.html +99 -0
  156. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/audio-tools/index.html +90 -0
  157. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/data-analysis-tools/index.html +107 -0
  158. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/embeds/index.html +152 -0
  159. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/index.html +103 -0
  160. solace_agent_mesh/assets/docs/docs/documentation/user-guide/create-agents/index.html +170 -0
  161. solace_agent_mesh/assets/docs/docs/documentation/user-guide/create-gateways/index.html +200 -0
  162. solace_agent_mesh/assets/docs/docs/documentation/user-guide/creating-service-providers/index.html +54 -0
  163. solace_agent_mesh/assets/docs/docs/documentation/user-guide/solace-ai-connector/index.html +69 -0
  164. solace_agent_mesh/assets/docs/docs/documentation/user-guide/structure/index.html +59 -0
  165. solace_agent_mesh/assets/docs/img/Solace_AI_Framework_README.png +0 -0
  166. solace_agent_mesh/assets/docs/img/Solace_AI_Framework_With_Broker.png +0 -0
  167. solace_agent_mesh/assets/docs/img/logo.png +0 -0
  168. solace_agent_mesh/assets/docs/img/sac-flows.png +0 -0
  169. solace_agent_mesh/assets/docs/img/sac_parts_of_a_component.png +0 -0
  170. solace_agent_mesh/assets/docs/img/solace-logo.png +0 -0
  171. solace_agent_mesh/assets/docs/lunr-index-1753813536522.json +1 -0
  172. solace_agent_mesh/assets/docs/lunr-index.json +1 -0
  173. solace_agent_mesh/assets/docs/search-doc-1753813536522.json +1 -0
  174. solace_agent_mesh/assets/docs/search-doc.json +1 -0
  175. solace_agent_mesh/assets/docs/sitemap.xml +1 -0
  176. solace_agent_mesh/cli/__init__.py +1 -1
  177. solace_agent_mesh/cli/commands/add_cmd/__init__.py +15 -0
  178. solace_agent_mesh/cli/commands/add_cmd/add_cmd_llm.txt +250 -0
  179. solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +659 -0
  180. solace_agent_mesh/cli/commands/add_cmd/gateway_cmd.py +322 -0
  181. solace_agent_mesh/cli/commands/add_cmd/web_add_agent_step.py +93 -0
  182. solace_agent_mesh/cli/commands/add_cmd/web_add_gateway_step.py +118 -0
  183. solace_agent_mesh/cli/commands/docs_cmd.py +57 -0
  184. solace_agent_mesh/cli/commands/eval_cmd.py +64 -0
  185. solace_agent_mesh/cli/commands/init_cmd/__init__.py +404 -0
  186. solace_agent_mesh/cli/commands/init_cmd/broker_step.py +201 -0
  187. solace_agent_mesh/cli/commands/init_cmd/directory_step.py +28 -0
  188. solace_agent_mesh/cli/commands/init_cmd/env_step.py +197 -0
  189. solace_agent_mesh/cli/commands/init_cmd/init_cmd_llm.txt +365 -0
  190. solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +387 -0
  191. solace_agent_mesh/cli/commands/init_cmd/project_files_step.py +38 -0
  192. solace_agent_mesh/cli/commands/init_cmd/web_init_step.py +110 -0
  193. solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +183 -0
  194. solace_agent_mesh/cli/commands/plugin_cmd/__init__.py +18 -0
  195. solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +372 -0
  196. solace_agent_mesh/cli/commands/plugin_cmd/build_cmd.py +86 -0
  197. solace_agent_mesh/cli/commands/plugin_cmd/catalog_cmd.py +138 -0
  198. solace_agent_mesh/cli/commands/plugin_cmd/create_cmd.py +309 -0
  199. solace_agent_mesh/cli/commands/plugin_cmd/official_registry.py +174 -0
  200. solace_agent_mesh/cli/commands/plugin_cmd/plugin_cmd_llm.txt +305 -0
  201. solace_agent_mesh/cli/commands/run_cmd.py +158 -0
  202. solace_agent_mesh/cli/main.py +17 -294
  203. solace_agent_mesh/cli/utils.py +135 -204
  204. solace_agent_mesh/client/webui/frontend/static/assets/authCallback-DvlO62me.js +1 -0
  205. solace_agent_mesh/client/webui/frontend/static/assets/client-bp6u3qVZ.js +49 -0
  206. solace_agent_mesh/client/webui/frontend/static/assets/favicon-BLgzUch9.ico +0 -0
  207. solace_agent_mesh/client/webui/frontend/static/assets/main-An0a5j5k.js +663 -0
  208. solace_agent_mesh/client/webui/frontend/static/assets/main-Bu5-4Bac.css +1 -0
  209. solace_agent_mesh/client/webui/frontend/static/auth-callback.html +14 -0
  210. solace_agent_mesh/client/webui/frontend/static/index.html +15 -0
  211. solace_agent_mesh/common/__init__.py +1 -0
  212. solace_agent_mesh/common/a2a_protocol.py +564 -0
  213. solace_agent_mesh/common/agent_registry.py +42 -0
  214. solace_agent_mesh/common/client/__init__.py +4 -0
  215. solace_agent_mesh/common/client/card_resolver.py +21 -0
  216. solace_agent_mesh/common/client/client.py +85 -0
  217. solace_agent_mesh/common/client/client_llm.txt +133 -0
  218. solace_agent_mesh/common/common_llm.txt +144 -0
  219. solace_agent_mesh/common/constants.py +1 -14
  220. solace_agent_mesh/common/middleware/__init__.py +12 -0
  221. solace_agent_mesh/common/middleware/config_resolver.py +130 -0
  222. solace_agent_mesh/common/middleware/middleware_llm.txt +174 -0
  223. solace_agent_mesh/common/middleware/registry.py +125 -0
  224. solace_agent_mesh/common/server/__init__.py +4 -0
  225. solace_agent_mesh/common/server/server.py +122 -0
  226. solace_agent_mesh/common/server/server_llm.txt +169 -0
  227. solace_agent_mesh/common/server/task_manager.py +291 -0
  228. solace_agent_mesh/common/server/utils.py +28 -0
  229. solace_agent_mesh/common/services/__init__.py +4 -0
  230. solace_agent_mesh/common/services/employee_service.py +162 -0
  231. solace_agent_mesh/common/services/identity_service.py +129 -0
  232. solace_agent_mesh/common/services/providers/__init__.py +4 -0
  233. solace_agent_mesh/common/services/providers/local_file_identity_service.py +148 -0
  234. solace_agent_mesh/common/services/providers/providers_llm.txt +113 -0
  235. solace_agent_mesh/common/services/services_llm.txt +132 -0
  236. solace_agent_mesh/common/types.py +411 -0
  237. solace_agent_mesh/common/utils/__init__.py +7 -0
  238. solace_agent_mesh/common/utils/asyncio_macos_fix.py +86 -0
  239. solace_agent_mesh/common/utils/embeds/__init__.py +33 -0
  240. solace_agent_mesh/common/utils/embeds/constants.py +55 -0
  241. solace_agent_mesh/common/utils/embeds/converter.py +452 -0
  242. solace_agent_mesh/common/utils/embeds/embeds_llm.txt +124 -0
  243. solace_agent_mesh/common/utils/embeds/evaluators.py +394 -0
  244. solace_agent_mesh/common/utils/embeds/modifiers.py +816 -0
  245. solace_agent_mesh/common/utils/embeds/resolver.py +865 -0
  246. solace_agent_mesh/common/utils/embeds/types.py +14 -0
  247. solace_agent_mesh/common/utils/in_memory_cache.py +108 -0
  248. solace_agent_mesh/common/utils/log_formatters.py +44 -0
  249. solace_agent_mesh/common/utils/mime_helpers.py +106 -0
  250. solace_agent_mesh/common/utils/push_notification_auth.py +134 -0
  251. solace_agent_mesh/common/utils/utils_llm.txt +67 -0
  252. solace_agent_mesh/config_portal/backend/common.py +66 -24
  253. solace_agent_mesh/config_portal/backend/plugin_catalog/constants.py +23 -0
  254. solace_agent_mesh/config_portal/backend/plugin_catalog/models.py +49 -0
  255. solace_agent_mesh/config_portal/backend/plugin_catalog/registry_manager.py +160 -0
  256. solace_agent_mesh/config_portal/backend/plugin_catalog/scraper.py +525 -0
  257. solace_agent_mesh/config_portal/backend/plugin_catalog_server.py +216 -0
  258. solace_agent_mesh/config_portal/backend/server.py +550 -140
  259. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-DNxCwAGB.js +48 -0
  260. solace_agent_mesh/config_portal/frontend/static/client/assets/components-B7lKcHVY.js +140 -0
  261. solace_agent_mesh/config_portal/frontend/static/client/assets/{entry.client-DX1misIU.js → entry.client-CEumGClk.js} +3 -3
  262. solace_agent_mesh/config_portal/frontend/static/client/assets/index-DSo1AH_7.js +68 -0
  263. solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-d2b54a97.js +1 -0
  264. solace_agent_mesh/config_portal/frontend/static/client/assets/{root-BApq5dPK.js → root-C4XmHinv.js} +2 -2
  265. solace_agent_mesh/config_portal/frontend/static/client/assets/root-DxRwaWiE.css +1 -0
  266. solace_agent_mesh/config_portal/frontend/static/client/index.html +3 -3
  267. solace_agent_mesh/core_a2a/__init__.py +1 -0
  268. solace_agent_mesh/core_a2a/core_a2a_llm.txt +88 -0
  269. solace_agent_mesh/core_a2a/service.py +331 -0
  270. solace_agent_mesh/evaluation/config_loader.py +657 -0
  271. solace_agent_mesh/evaluation/evaluator.py +667 -0
  272. solace_agent_mesh/evaluation/message_organizer.py +568 -0
  273. solace_agent_mesh/evaluation/report/benchmark_info.html +35 -0
  274. solace_agent_mesh/evaluation/report/chart_section.html +141 -0
  275. solace_agent_mesh/evaluation/report/detailed_breakdown.html +28 -0
  276. solace_agent_mesh/evaluation/report/modal.html +59 -0
  277. solace_agent_mesh/evaluation/report/modal_chart_functions.js +411 -0
  278. solace_agent_mesh/evaluation/report/modal_script.js +296 -0
  279. solace_agent_mesh/evaluation/report/modal_styles.css +340 -0
  280. solace_agent_mesh/evaluation/report/performance_metrics_styles.css +93 -0
  281. solace_agent_mesh/evaluation/report/templates/footer.html +2 -0
  282. solace_agent_mesh/evaluation/report/templates/header.html +340 -0
  283. solace_agent_mesh/evaluation/report_data_processor.py +972 -0
  284. solace_agent_mesh/evaluation/report_generator.py +613 -0
  285. solace_agent_mesh/evaluation/run.py +613 -0
  286. solace_agent_mesh/evaluation/subscriber.py +872 -0
  287. solace_agent_mesh/evaluation/summary_builder.py +775 -0
  288. solace_agent_mesh/evaluation/test_case_loader.py +714 -0
  289. solace_agent_mesh/gateway/base/__init__.py +1 -0
  290. solace_agent_mesh/gateway/base/app.py +266 -0
  291. solace_agent_mesh/gateway/base/base_llm.txt +119 -0
  292. solace_agent_mesh/gateway/base/component.py +1542 -0
  293. solace_agent_mesh/gateway/base/task_context.py +74 -0
  294. solace_agent_mesh/gateway/gateway_llm.txt +125 -0
  295. solace_agent_mesh/gateway/http_sse/app.py +190 -0
  296. solace_agent_mesh/gateway/http_sse/component.py +1602 -0
  297. solace_agent_mesh/gateway/http_sse/components/__init__.py +7 -0
  298. solace_agent_mesh/gateway/http_sse/components/components_llm.txt +65 -0
  299. solace_agent_mesh/gateway/http_sse/components/visualization_forwarder_component.py +108 -0
  300. solace_agent_mesh/gateway/http_sse/dependencies.py +316 -0
  301. solace_agent_mesh/gateway/http_sse/http_sse_llm.txt +63 -0
  302. solace_agent_mesh/gateway/http_sse/main.py +442 -0
  303. solace_agent_mesh/gateway/http_sse/routers/__init__.py +4 -0
  304. solace_agent_mesh/gateway/http_sse/routers/agents.py +41 -0
  305. solace_agent_mesh/gateway/http_sse/routers/artifacts.py +821 -0
  306. solace_agent_mesh/gateway/http_sse/routers/auth.py +212 -0
  307. solace_agent_mesh/gateway/http_sse/routers/config.py +55 -0
  308. solace_agent_mesh/gateway/http_sse/routers/people.py +69 -0
  309. solace_agent_mesh/gateway/http_sse/routers/routers_llm.txt +37 -0
  310. solace_agent_mesh/gateway/http_sse/routers/sessions.py +80 -0
  311. solace_agent_mesh/gateway/http_sse/routers/sse.py +138 -0
  312. solace_agent_mesh/gateway/http_sse/routers/tasks.py +294 -0
  313. solace_agent_mesh/gateway/http_sse/routers/users.py +59 -0
  314. solace_agent_mesh/gateway/http_sse/routers/visualization.py +1131 -0
  315. solace_agent_mesh/gateway/http_sse/services/__init__.py +4 -0
  316. solace_agent_mesh/gateway/http_sse/services/agent_service.py +69 -0
  317. solace_agent_mesh/gateway/http_sse/services/people_service.py +158 -0
  318. solace_agent_mesh/gateway/http_sse/services/services_llm.txt +179 -0
  319. solace_agent_mesh/gateway/http_sse/services/task_service.py +121 -0
  320. solace_agent_mesh/gateway/http_sse/session_manager.py +187 -0
  321. solace_agent_mesh/gateway/http_sse/sse_manager.py +328 -0
  322. solace_agent_mesh/llm.txt +228 -0
  323. solace_agent_mesh/llm_detail.txt +2835 -0
  324. solace_agent_mesh/templates/agent_template.yaml +53 -0
  325. solace_agent_mesh/templates/eval_backend_template.yaml +54 -0
  326. solace_agent_mesh/templates/gateway_app_template.py +73 -0
  327. solace_agent_mesh/templates/gateway_component_template.py +400 -0
  328. solace_agent_mesh/templates/gateway_config_template.yaml +43 -0
  329. solace_agent_mesh/templates/main_orchestrator.yaml +55 -0
  330. solace_agent_mesh/templates/plugin_agent_config_template.yaml +122 -0
  331. solace_agent_mesh/templates/plugin_custom_config_template.yaml +27 -0
  332. solace_agent_mesh/templates/plugin_custom_template.py +10 -0
  333. solace_agent_mesh/templates/plugin_gateway_config_template.yaml +63 -0
  334. solace_agent_mesh/templates/plugin_pyproject_template.toml +33 -0
  335. solace_agent_mesh/templates/plugin_readme_template.md +34 -0
  336. solace_agent_mesh/templates/plugin_tools_template.py +224 -0
  337. solace_agent_mesh/templates/shared_config.yaml +66 -0
  338. solace_agent_mesh/templates/templates_llm.txt +147 -0
  339. solace_agent_mesh/templates/webui.yaml +53 -0
  340. solace_agent_mesh-1.0.1.dist-info/METADATA +432 -0
  341. solace_agent_mesh-1.0.1.dist-info/RECORD +359 -0
  342. solace_agent_mesh-1.0.1.dist-info/entry_points.txt +3 -0
  343. {solace_agent_mesh-0.2.3.dist-info → solace_agent_mesh-1.0.1.dist-info}/licenses/LICENSE +1 -1
  344. solace_agent_mesh/agents/base_agent_component.py +0 -226
  345. solace_agent_mesh/agents/global/actions/agent_state_change.py +0 -54
  346. solace_agent_mesh/agents/global/actions/clear_history.py +0 -32
  347. solace_agent_mesh/agents/global/actions/convert_file_to_markdown.py +0 -160
  348. solace_agent_mesh/agents/global/actions/create_file.py +0 -70
  349. solace_agent_mesh/agents/global/actions/error_action.py +0 -45
  350. solace_agent_mesh/agents/global/actions/plantuml_diagram.py +0 -163
  351. solace_agent_mesh/agents/global/actions/plotly_graph.py +0 -152
  352. solace_agent_mesh/agents/global/actions/retrieve_file.py +0 -51
  353. solace_agent_mesh/agents/global/global_agent_component.py +0 -38
  354. solace_agent_mesh/agents/image_processing/actions/create_image.py +0 -75
  355. solace_agent_mesh/agents/image_processing/actions/describe_image.py +0 -115
  356. solace_agent_mesh/agents/image_processing/image_processing_agent_component.py +0 -23
  357. solace_agent_mesh/agents/slack/__init__.py +0 -1
  358. solace_agent_mesh/agents/slack/actions/__init__.py +0 -1
  359. solace_agent_mesh/agents/slack/actions/post_message.py +0 -177
  360. solace_agent_mesh/agents/slack/slack_agent_component.py +0 -59
  361. solace_agent_mesh/agents/web_request/actions/do_image_search.py +0 -84
  362. solace_agent_mesh/agents/web_request/actions/do_news_search.py +0 -47
  363. solace_agent_mesh/agents/web_request/actions/do_suggestion_search.py +0 -34
  364. solace_agent_mesh/agents/web_request/actions/do_web_request.py +0 -135
  365. solace_agent_mesh/agents/web_request/actions/download_file.py +0 -69
  366. solace_agent_mesh/agents/web_request/web_request_agent_component.py +0 -33
  367. solace_agent_mesh/assets/web-visualizer/assets/index-D0qORgkg.css +0 -1
  368. solace_agent_mesh/assets/web-visualizer/assets/index-DnDr1pnu.js +0 -109
  369. solace_agent_mesh/assets/web-visualizer/index.html +0 -14
  370. solace_agent_mesh/assets/web-visualizer/vite.svg +0 -1
  371. solace_agent_mesh/cli/commands/add/__init__.py +0 -3
  372. solace_agent_mesh/cli/commands/add/add.py +0 -88
  373. solace_agent_mesh/cli/commands/add/agent.py +0 -110
  374. solace_agent_mesh/cli/commands/add/copy_from_plugin.py +0 -92
  375. solace_agent_mesh/cli/commands/add/gateway.py +0 -374
  376. solace_agent_mesh/cli/commands/build.py +0 -670
  377. solace_agent_mesh/cli/commands/chat/__init__.py +0 -3
  378. solace_agent_mesh/cli/commands/chat/chat.py +0 -361
  379. solace_agent_mesh/cli/commands/config.py +0 -29
  380. solace_agent_mesh/cli/commands/init/__init__.py +0 -3
  381. solace_agent_mesh/cli/commands/init/ai_provider_step.py +0 -93
  382. solace_agent_mesh/cli/commands/init/broker_step.py +0 -99
  383. solace_agent_mesh/cli/commands/init/builtin_agent_step.py +0 -83
  384. solace_agent_mesh/cli/commands/init/check_if_already_done.py +0 -13
  385. solace_agent_mesh/cli/commands/init/create_config_file_step.py +0 -65
  386. solace_agent_mesh/cli/commands/init/create_other_project_files_step.py +0 -147
  387. solace_agent_mesh/cli/commands/init/file_service_step.py +0 -73
  388. solace_agent_mesh/cli/commands/init/init.py +0 -92
  389. solace_agent_mesh/cli/commands/init/project_structure_step.py +0 -16
  390. solace_agent_mesh/cli/commands/init/web_init_step.py +0 -32
  391. solace_agent_mesh/cli/commands/plugin/__init__.py +0 -3
  392. solace_agent_mesh/cli/commands/plugin/add.py +0 -100
  393. solace_agent_mesh/cli/commands/plugin/build.py +0 -268
  394. solace_agent_mesh/cli/commands/plugin/create.py +0 -117
  395. solace_agent_mesh/cli/commands/plugin/plugin.py +0 -124
  396. solace_agent_mesh/cli/commands/plugin/remove.py +0 -73
  397. solace_agent_mesh/cli/commands/run.py +0 -68
  398. solace_agent_mesh/cli/commands/visualizer.py +0 -138
  399. solace_agent_mesh/cli/config.py +0 -85
  400. solace_agent_mesh/common/action.py +0 -91
  401. solace_agent_mesh/common/action_list.py +0 -37
  402. solace_agent_mesh/common/action_response.py +0 -340
  403. solace_agent_mesh/common/mysql_database.py +0 -40
  404. solace_agent_mesh/common/postgres_database.py +0 -85
  405. solace_agent_mesh/common/prompt_templates.py +0 -28
  406. solace_agent_mesh/common/stimulus_utils.py +0 -152
  407. solace_agent_mesh/common/time.py +0 -24
  408. solace_agent_mesh/common/utils.py +0 -712
  409. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-DMmCawWe.js +0 -42
  410. solace_agent_mesh/config_portal/frontend/static/client/assets/components-ZIfdTbrV.js +0 -191
  411. solace_agent_mesh/config_portal/frontend/static/client/assets/index-BJHAE5s4.js +0 -17
  412. solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-dd988f05.js +0 -1
  413. solace_agent_mesh/config_portal/frontend/static/client/assets/root-DX4gQ516.css +0 -1
  414. solace_agent_mesh/configs/agent_global.yaml +0 -74
  415. solace_agent_mesh/configs/agent_image_processing.yaml +0 -82
  416. solace_agent_mesh/configs/agent_slack.yaml +0 -64
  417. solace_agent_mesh/configs/agent_web_request.yaml +0 -75
  418. solace_agent_mesh/configs/conversation_to_file.yaml +0 -56
  419. solace_agent_mesh/configs/error_catcher.yaml +0 -56
  420. solace_agent_mesh/configs/monitor.yaml +0 -0
  421. solace_agent_mesh/configs/monitor_stim_and_errors_to_slack.yaml +0 -109
  422. solace_agent_mesh/configs/monitor_user_feedback.yaml +0 -58
  423. solace_agent_mesh/configs/orchestrator.yaml +0 -241
  424. solace_agent_mesh/configs/service_embedding.yaml +0 -81
  425. solace_agent_mesh/configs/service_llm.yaml +0 -265
  426. solace_agent_mesh/configs/visualize_websocket.yaml +0 -55
  427. solace_agent_mesh/gateway/components/gateway_base.py +0 -47
  428. solace_agent_mesh/gateway/components/gateway_input.py +0 -278
  429. solace_agent_mesh/gateway/components/gateway_output.py +0 -298
  430. solace_agent_mesh/gateway/identity/bamboohr_identity.py +0 -18
  431. solace_agent_mesh/gateway/identity/identity_base.py +0 -10
  432. solace_agent_mesh/gateway/identity/identity_provider.py +0 -60
  433. solace_agent_mesh/gateway/identity/no_identity.py +0 -9
  434. solace_agent_mesh/gateway/identity/passthru_identity.py +0 -9
  435. solace_agent_mesh/monitors/base_monitor_component.py +0 -26
  436. solace_agent_mesh/monitors/feedback/user_feedback_monitor.py +0 -75
  437. solace_agent_mesh/monitors/stim_and_errors/stim_and_error_monitor.py +0 -560
  438. solace_agent_mesh/orchestrator/__init__.py +0 -0
  439. solace_agent_mesh/orchestrator/action_manager.py +0 -237
  440. solace_agent_mesh/orchestrator/components/__init__.py +0 -0
  441. solace_agent_mesh/orchestrator/components/orchestrator_action_manager_timeout_component.py +0 -58
  442. solace_agent_mesh/orchestrator/components/orchestrator_action_response_component.py +0 -179
  443. solace_agent_mesh/orchestrator/components/orchestrator_register_component.py +0 -107
  444. solace_agent_mesh/orchestrator/components/orchestrator_stimulus_processor_component.py +0 -527
  445. solace_agent_mesh/orchestrator/components/orchestrator_streaming_output_component.py +0 -260
  446. solace_agent_mesh/orchestrator/orchestrator_main.py +0 -172
  447. solace_agent_mesh/orchestrator/orchestrator_prompt.py +0 -539
  448. solace_agent_mesh/services/__init__.py +0 -0
  449. solace_agent_mesh/services/authorization/providers/base_authorization_provider.py +0 -56
  450. solace_agent_mesh/services/bamboo_hr_service/__init__.py +0 -3
  451. solace_agent_mesh/services/bamboo_hr_service/bamboo_hr.py +0 -182
  452. solace_agent_mesh/services/common/__init__.py +0 -4
  453. solace_agent_mesh/services/common/auto_expiry.py +0 -45
  454. solace_agent_mesh/services/common/singleton.py +0 -18
  455. solace_agent_mesh/services/file_service/__init__.py +0 -14
  456. solace_agent_mesh/services/file_service/file_manager/__init__.py +0 -0
  457. solace_agent_mesh/services/file_service/file_manager/bucket_file_manager.py +0 -149
  458. solace_agent_mesh/services/file_service/file_manager/file_manager_base.py +0 -162
  459. solace_agent_mesh/services/file_service/file_manager/memory_file_manager.py +0 -64
  460. solace_agent_mesh/services/file_service/file_manager/volume_file_manager.py +0 -106
  461. solace_agent_mesh/services/file_service/file_service.py +0 -437
  462. solace_agent_mesh/services/file_service/file_service_constants.py +0 -54
  463. solace_agent_mesh/services/file_service/file_transformations.py +0 -141
  464. solace_agent_mesh/services/file_service/file_utils.py +0 -324
  465. solace_agent_mesh/services/file_service/transformers/__init__.py +0 -5
  466. solace_agent_mesh/services/history_service/__init__.py +0 -3
  467. solace_agent_mesh/services/history_service/history_providers/__init__.py +0 -0
  468. solace_agent_mesh/services/history_service/history_providers/base_history_provider.py +0 -54
  469. solace_agent_mesh/services/history_service/history_providers/file_history_provider.py +0 -74
  470. solace_agent_mesh/services/history_service/history_providers/index.py +0 -40
  471. solace_agent_mesh/services/history_service/history_providers/memory_history_provider.py +0 -33
  472. solace_agent_mesh/services/history_service/history_providers/mongodb_history_provider.py +0 -66
  473. solace_agent_mesh/services/history_service/history_providers/redis_history_provider.py +0 -66
  474. solace_agent_mesh/services/history_service/history_providers/sql_history_provider.py +0 -93
  475. solace_agent_mesh/services/history_service/history_service.py +0 -413
  476. solace_agent_mesh/services/history_service/long_term_memory/__init__.py +0 -0
  477. solace_agent_mesh/services/history_service/long_term_memory/long_term_memory.py +0 -399
  478. solace_agent_mesh/services/llm_service/components/llm_request_component.py +0 -340
  479. solace_agent_mesh/services/llm_service/components/llm_service_component_base.py +0 -152
  480. solace_agent_mesh/services/middleware_service/__init__.py +0 -0
  481. solace_agent_mesh/services/middleware_service/middleware_service.py +0 -20
  482. solace_agent_mesh/templates/action.py +0 -38
  483. solace_agent_mesh/templates/agent.py +0 -29
  484. solace_agent_mesh/templates/agent.yaml +0 -70
  485. solace_agent_mesh/templates/gateway-config-template.yaml +0 -6
  486. solace_agent_mesh/templates/gateway-default-config.yaml +0 -28
  487. solace_agent_mesh/templates/gateway-flows.yaml +0 -78
  488. solace_agent_mesh/templates/gateway-header.yaml +0 -16
  489. solace_agent_mesh/templates/gateway_base.py +0 -15
  490. solace_agent_mesh/templates/gateway_input.py +0 -98
  491. solace_agent_mesh/templates/gateway_output.py +0 -71
  492. solace_agent_mesh/templates/plugin-gateway-default-config.yaml +0 -29
  493. solace_agent_mesh/templates/plugin-pyproject.toml +0 -30
  494. solace_agent_mesh/templates/rest-api-default-config.yaml +0 -31
  495. solace_agent_mesh/templates/rest-api-flows.yaml +0 -81
  496. solace_agent_mesh/templates/slack-default-config.yaml +0 -16
  497. solace_agent_mesh/templates/slack-flows.yaml +0 -81
  498. solace_agent_mesh/templates/solace-agent-mesh-default.yaml +0 -86
  499. solace_agent_mesh/templates/solace-agent-mesh-plugin-default.yaml +0 -8
  500. solace_agent_mesh/templates/web-default-config.yaml +0 -10
  501. solace_agent_mesh/templates/web-flows.yaml +0 -76
  502. solace_agent_mesh/tools/__init__.py +0 -0
  503. solace_agent_mesh/tools/components/__init__.py +0 -0
  504. solace_agent_mesh/tools/components/conversation_formatter.py +0 -111
  505. solace_agent_mesh/tools/components/file_resolver_component.py +0 -58
  506. solace_agent_mesh/tools/config/runtime_config.py +0 -26
  507. solace_agent_mesh-0.2.3.dist-info/METADATA +0 -172
  508. solace_agent_mesh-0.2.3.dist-info/RECORD +0 -193
  509. solace_agent_mesh-0.2.3.dist-info/entry_points.txt +0 -3
  510. /solace_agent_mesh/{agents → agent}/__init__.py +0 -0
  511. /solace_agent_mesh/{agents/global → agent/adk}/__init__.py +0 -0
  512. /solace_agent_mesh/{agents/global/actions → agent/protocol}/__init__.py +0 -0
  513. /solace_agent_mesh/{agents/image_processing → agent/sac}/__init__.py +0 -0
  514. /solace_agent_mesh/{agents/image_processing/actions → agent/utils}/__init__.py +0 -0
  515. /solace_agent_mesh/{agents/web_request → config_portal/backend/plugin_catalog}/__init__.py +0 -0
  516. /solace_agent_mesh/{agents/web_request/actions → evaluation}/__init__.py +0 -0
  517. /solace_agent_mesh/gateway/{components → http_sse}/__init__.py +0 -0
  518. {solace_agent_mesh-0.2.3.dist-info → solace_agent_mesh-1.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,667 @@
1
+ """
2
+ Refactored evaluator with improved structure and readability.
3
+ This module evaluates AI model performance against test cases using multiple evaluation strategies.
4
+ """
5
+
6
+ import concurrent.futures
7
+ import json
8
+ import os
9
+ import re
10
+ import sys
11
+ from abc import ABC, abstractmethod
12
+ from collections import defaultdict
13
+ from dataclasses import dataclass, field
14
+ from typing import Dict, List, Optional, Any, Tuple
15
+ import logging
16
+
17
+ import numpy as np
18
+ from rouge import Rouge
19
+ import litellm
20
+
21
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
22
+ from evaluation.config_loader import ConfigLoader
23
+ from evaluation.test_case_loader import load_test_case
24
+
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
29
+
30
+
31
+ @dataclass
32
+ class EvaluationResult:
33
+ """Represents the evaluation result for a single run."""
34
+
35
+ run_number: int
36
+ test_case_id: str
37
+ test_case_path: str
38
+ tool_match_score: Optional[float] = None
39
+ response_match_score: Optional[float] = None
40
+ llm_eval_score: Optional[float] = None
41
+ llm_eval_reasoning: Optional[str] = None
42
+ duration_seconds: Optional[float] = None
43
+ errors: List[str] = field(default_factory=list)
44
+
45
+ def to_dict(self) -> Dict[str, Any]:
46
+ """Convert to dictionary format for JSON serialization."""
47
+ result = {
48
+ "run": self.run_number,
49
+ "test_case_id": self.test_case_id,
50
+ "test_case_path": self.test_case_path,
51
+ "duration_seconds": self.duration_seconds,
52
+ }
53
+
54
+ if self.tool_match_score is not None:
55
+ result["tool_match"] = self.tool_match_score
56
+
57
+ if self.response_match_score is not None:
58
+ result["response_match"] = self.response_match_score
59
+
60
+ if self.llm_eval_score is not None:
61
+ result["llm_eval"] = {
62
+ "score": self.llm_eval_score,
63
+ "reasoning": self.llm_eval_reasoning,
64
+ }
65
+
66
+ if self.errors:
67
+ result["errors"] = self.errors
68
+
69
+ return result
70
+
71
+
72
+ @dataclass
73
+ class ScoreStatistics:
74
+ """Statistical summary of evaluation scores."""
75
+
76
+ average: float
77
+ distribution: Dict[str, float]
78
+
79
+ @classmethod
80
+ def from_scores(cls, scores: List[float]) -> "ScoreStatistics":
81
+ """Create statistics from a list of scores."""
82
+ if not scores:
83
+ return cls(
84
+ average=0.0,
85
+ distribution={"min": 0.0, "q1": 0.0, "q2": 0.0, "q3": 0.0, "max": 0.0},
86
+ )
87
+
88
+ return cls(
89
+ average=float(np.mean(scores)),
90
+ distribution={
91
+ "min": float(np.min(scores)),
92
+ "q1": float(np.percentile(scores, 25)),
93
+ "q2": float(np.median(scores)),
94
+ "q3": float(np.percentile(scores, 75)),
95
+ "max": float(np.max(scores)),
96
+ },
97
+ )
98
+
99
+
100
+ @dataclass
101
+ class TestCaseResults:
102
+ """Aggregated results for a test case across multiple runs."""
103
+
104
+ test_case_id: str
105
+ category: str
106
+ runs: List[EvaluationResult]
107
+ average_duration: float
108
+ tool_match_scores: ScoreStatistics
109
+ response_match_scores: ScoreStatistics
110
+ llm_eval_scores: ScoreStatistics
111
+
112
+ def to_dict(self) -> Dict[str, Any]:
113
+ """Convert to dictionary format for JSON serialization."""
114
+ return {
115
+ "test_case_id": self.test_case_id,
116
+ "category": self.category,
117
+ "runs": [run.to_dict() for run in self.runs],
118
+ "average_duration": self.average_duration,
119
+ "tool_match_scores": {
120
+ "average": self.tool_match_scores.average,
121
+ "distribution": self.tool_match_scores.distribution,
122
+ },
123
+ "response_match_scores": {
124
+ "average": self.response_match_scores.average,
125
+ "distribution": self.response_match_scores.distribution,
126
+ },
127
+ "llm_eval_scores": {
128
+ "average": self.llm_eval_scores.average,
129
+ "distribution": self.llm_eval_scores.distribution,
130
+ },
131
+ }
132
+
133
+
134
+ @dataclass
135
+ class ModelResults:
136
+ """Complete evaluation results for a model."""
137
+
138
+ model_name: str
139
+ total_execution_time: Optional[float]
140
+ test_cases: List[TestCaseResults]
141
+
142
+ def to_dict(self) -> Dict[str, Any]:
143
+ """Convert to dictionary format for JSON serialization."""
144
+ return {
145
+ "model_name": self.model_name,
146
+ "total_execution_time": self.total_execution_time,
147
+ "test_cases": [tc.to_dict() for tc in self.test_cases],
148
+ }
149
+
150
+
151
+ class ConfigurationService:
152
+ """Handles configuration loading and validation."""
153
+
154
+ def __init__(self, config_path: str):
155
+ self.config_loader = ConfigLoader(config_path)
156
+ self._config_cache = None
157
+ self._evaluation_settings_cache = None
158
+
159
+ def get_config(self) -> Dict[str, Any]:
160
+ """Get the main configuration."""
161
+ if self._config_cache is None:
162
+ self._config_cache = self.config_loader.load_config()
163
+ return self._config_cache
164
+
165
+ def get_evaluation_settings(self) -> Dict[str, Any]:
166
+ """Get evaluation settings."""
167
+ if self._evaluation_settings_cache is None:
168
+ self._evaluation_settings_cache = (
169
+ self.config_loader.get_evaluation_settings()
170
+ )
171
+ return self._evaluation_settings_cache
172
+
173
+ def get_results_path(self) -> str:
174
+ """Get the base results path."""
175
+ config = self.get_config()
176
+ results_dir_name = config["results_dir_name"]
177
+ return os.path.join(SCRIPT_DIR, "results", results_dir_name)
178
+
179
+
180
+ class FileService:
181
+ """Handles file I/O operations."""
182
+
183
+ @staticmethod
184
+ def load_json(filepath: str) -> Any:
185
+ """Load JSON data from file."""
186
+ try:
187
+ with open(filepath, "r") as f:
188
+ return json.load(f)
189
+ except (FileNotFoundError, json.JSONDecodeError) as e:
190
+ logger.error(f"Failed to load JSON from {filepath}: {e}")
191
+ raise
192
+
193
+ @staticmethod
194
+ def save_json(data: Any, filepath: str):
195
+ """Save data as JSON to file."""
196
+ try:
197
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
198
+ with open(filepath, "w") as f:
199
+ json.dump(data, f, indent=4)
200
+ except Exception as e:
201
+ logger.error(f"Failed to save JSON to {filepath}: {e}")
202
+ raise
203
+
204
+ @staticmethod
205
+ def file_exists(filepath: str) -> bool:
206
+ """Check if file exists."""
207
+ return os.path.exists(filepath)
208
+
209
+
210
+ class StatisticsService:
211
+ """Handles statistical calculations and aggregations."""
212
+
213
+ @staticmethod
214
+ def calculate_score_statistics(scores: List[float]) -> ScoreStatistics:
215
+ """Calculate statistical summary for a list of scores."""
216
+ return ScoreStatistics.from_scores(scores)
217
+
218
+ @staticmethod
219
+ def calculate_average_duration(durations: List[float]) -> float:
220
+ """Calculate average duration from a list of durations."""
221
+ if not durations:
222
+ return 0.0
223
+ return float(np.mean(durations))
224
+
225
+
226
+ class EvaluationStrategy(ABC):
227
+ """Abstract base class for evaluation strategies."""
228
+
229
+ @abstractmethod
230
+ def evaluate(
231
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
232
+ ) -> Optional[float]:
233
+ """Evaluate a test case run and return a score."""
234
+ pass
235
+
236
+
237
+ class ToolMatchEvaluator(EvaluationStrategy):
238
+ """Evaluates tool usage against expected tools."""
239
+
240
+ def evaluate(
241
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
242
+ ) -> Optional[float]:
243
+ """Evaluate tool matching score."""
244
+ try:
245
+ expected_tools = test_case["evaluation"]["expected_tools"]
246
+ actual_tools = [
247
+ tool["tool_name"] for tool in summary_data.get("tool_calls", [])
248
+ ]
249
+
250
+ expected_set = set(expected_tools)
251
+ actual_set = set(actual_tools)
252
+
253
+ if not expected_set:
254
+ return 1.0
255
+
256
+ found_tools = expected_set.intersection(actual_set)
257
+ return len(found_tools) / len(expected_set)
258
+
259
+ except (KeyError, TypeError) as e:
260
+ logger.warning(f"Error in tool match evaluation: {e}")
261
+ return None
262
+
263
+
264
+ class ResponseMatchEvaluator(EvaluationStrategy):
265
+ """Evaluates response quality using ROUGE metrics."""
266
+
267
+ def __init__(self):
268
+ self.rouge = Rouge()
269
+
270
+ def evaluate(
271
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
272
+ ) -> Optional[float]:
273
+ """Evaluate response matching score using a weighted ROUGE average."""
274
+ try:
275
+ expected_response = test_case["evaluation"]["expected_response"]
276
+ actual_response = summary_data.get("final_message", "")
277
+
278
+ if not actual_response or not expected_response:
279
+ return 0.0
280
+
281
+ scores = self.rouge.get_scores(actual_response, expected_response)[0]
282
+
283
+ # Weighted average of ROUGE-1, ROUGE-2, and ROUGE-L f-scores
284
+ rouge_1_f = scores.get("rouge-1", {}).get("f", 0.0)
285
+ rouge_2_f = scores.get("rouge-2", {}).get("f", 0.0)
286
+ rouge_l_f = scores.get("rouge-l", {}).get("f", 0.0)
287
+
288
+ weighted_score = (0.2 * rouge_1_f) + (0.3 * rouge_2_f) + (0.5 * rouge_l_f)
289
+
290
+ return weighted_score
291
+
292
+ except (ValueError, KeyError, TypeError) as e:
293
+ logger.warning(f"Error in response match evaluation: {e}")
294
+ return 0.0
295
+
296
+
297
+ class LLMEvaluator(EvaluationStrategy):
298
+ """Evaluates responses using an LLM judge."""
299
+
300
+ def __init__(self, llm_config: Dict[str, Any]):
301
+ self.model = llm_config.get("LLM_SERVICE_PLANNING_MODEL_NAME")
302
+ self.api_key = llm_config.get("LLM_SERVICE_API_KEY")
303
+ self.api_base = llm_config.get("LLM_SERVICE_ENDPOINT")
304
+
305
+ if not all([self.model, self.api_key, self.api_base]):
306
+ raise ValueError(
307
+ "LLM evaluator requires model, api_key, and api_base configuration"
308
+ )
309
+
310
+ def evaluate(
311
+ self, test_case: Dict[str, Any], summary_data: Dict[str, Any]
312
+ ) -> Optional[Dict[str, Any]]:
313
+ """Evaluate response using LLM and return score with reasoning."""
314
+ try:
315
+ query = test_case["query"]
316
+ expected_response = test_case["evaluation"]["expected_response"]
317
+ actual_response = summary_data.get("final_message", "")
318
+ criterion = test_case["evaluation"]["criterion"]
319
+ input_artifacts = summary_data.get("input_artifacts", [])
320
+ output_artifacts = summary_data.get("output_artifacts", [])
321
+
322
+ prompt = self._build_evaluation_prompt(
323
+ query,
324
+ expected_response,
325
+ actual_response,
326
+ criterion,
327
+ input_artifacts,
328
+ output_artifacts,
329
+ )
330
+
331
+ response = litellm.completion(
332
+ model=self.model,
333
+ messages=[{"role": "user", "content": prompt}],
334
+ api_key=self.api_key,
335
+ base_url=self.api_base,
336
+ )
337
+
338
+ response_content = response.choices[0].message.content.strip()
339
+ score = self._extract_score(response_content)
340
+ reasoning = self._extract_reasoning(response_content)
341
+
342
+ return {"score": score, "reasoning": reasoning}
343
+
344
+ except Exception as e:
345
+ logger.error(f"Error in LLM evaluation: {e}")
346
+ return None
347
+
348
+ def _build_evaluation_prompt(
349
+ self,
350
+ query: str,
351
+ expected_response: str,
352
+ actual_response: str,
353
+ criterion: str,
354
+ input_artifacts: List[Dict],
355
+ output_artifacts: List[Dict],
356
+ ) -> str:
357
+ """Build the evaluation prompt for the LLM."""
358
+ return f"""
359
+ Original Query: {query}
360
+ Expected Response: {expected_response}
361
+ Actual Response: {actual_response}
362
+ Criterion: {criterion}
363
+ Input Artifacts: {input_artifacts}
364
+ Output Artifacts: {output_artifacts}
365
+
366
+ Based on the criterion, please evaluate the actual response.
367
+ Format your response exactly as:
368
+ Score: [0.0-1.0]
369
+ Reasoning: [Your detailed explanation of why you gave this score, considering both the response and any artifacts created]
370
+
371
+ Provide a score from 0.0 to 1.0 where:
372
+ - 1.0 = Excellent: Fully meets the criterion and expectations
373
+ - 0.8-0.9 = Good: Mostly meets the criterion with minor issues
374
+ - 0.6-0.7 = Adequate: Partially meets the criterion but has notable gaps
375
+ - 0.4-0.5 = Poor: Minimally meets the criterion with significant issues
376
+ - 0.0-0.3 = Very Poor: Fails to meet the criterion
377
+ """
378
+
379
+ def _extract_score(self, llm_response: str) -> float:
380
+ """Extract numerical score from LLM response."""
381
+ # Try to find "Score: X.X" pattern first
382
+ score_match = re.search(
383
+ r"Score:\s*([0-9]*\.?[0-9]+)", llm_response, re.IGNORECASE
384
+ )
385
+ if score_match:
386
+ try:
387
+ score = float(score_match.group(1))
388
+ return max(0.0, min(1.0, score))
389
+ except ValueError:
390
+ pass
391
+
392
+ # Fallback: look for any number between 0 and 1
393
+ number_match = re.search(r"\b([0-1]\.?[0-9]*)\b", llm_response)
394
+ if number_match:
395
+ try:
396
+ score = float(number_match.group(1))
397
+ if 0.0 <= score <= 1.0:
398
+ return score
399
+ except ValueError:
400
+ pass
401
+
402
+ return 0.0
403
+
404
+ def _extract_reasoning(self, llm_response: str) -> str:
405
+ """Extract reasoning from LLM response."""
406
+ reasoning_match = re.search(
407
+ r"Reasoning:\s*(.+)", llm_response, re.IGNORECASE | re.DOTALL
408
+ )
409
+ if reasoning_match:
410
+ return reasoning_match.group(1).strip()
411
+
412
+ return llm_response.strip()
413
+
414
+
415
+ class RunEvaluator:
416
+ """Evaluates individual test runs."""
417
+
418
+ def __init__(self, evaluation_settings: Dict[str, Any]):
419
+ self.evaluation_settings = evaluation_settings
420
+ self.file_service = FileService()
421
+
422
+ # Initialize evaluators based on settings
423
+ self.tool_evaluator = (
424
+ ToolMatchEvaluator()
425
+ if evaluation_settings["tool_match"]["enabled"]
426
+ else None
427
+ )
428
+ self.response_evaluator = (
429
+ ResponseMatchEvaluator()
430
+ if evaluation_settings["response_match"]["enabled"]
431
+ else None
432
+ )
433
+
434
+ self.llm_evaluator = None
435
+ if evaluation_settings["llm_evaluator"]["enabled"]:
436
+ try:
437
+ llm_config = evaluation_settings["llm_evaluator"]["env"]
438
+ self.llm_evaluator = LLMEvaluator(llm_config)
439
+ except Exception as e:
440
+ logger.error(f"Failed to initialize LLM evaluator: {e}")
441
+
442
+ def evaluate_run(
443
+ self,
444
+ run_number: int,
445
+ run_path: str,
446
+ test_case: Dict[str, Any],
447
+ test_case_path: str,
448
+ ) -> Optional[EvaluationResult]:
449
+ """Evaluate a single test run."""
450
+ logger.info(
451
+ f" - Evaluating run {run_number} for test case {test_case['test_case_id']}"
452
+ )
453
+
454
+ # Load summary data
455
+ summary_path = os.path.join(run_path, "summary.json")
456
+ if not self.file_service.file_exists(summary_path):
457
+ logger.warning(
458
+ f" Summary file not found for run {run_number}, skipping."
459
+ )
460
+ return None
461
+
462
+ try:
463
+ summary_data = self.file_service.load_json(summary_path)
464
+ except Exception as e:
465
+ logger.error(f" Error loading summary.json for run {run_number}: {e}")
466
+ return None
467
+
468
+ # Create evaluation result
469
+ result = EvaluationResult(
470
+ run_number=run_number,
471
+ test_case_id=test_case["test_case_id"],
472
+ test_case_path=test_case_path,
473
+ duration_seconds=summary_data.get("duration_seconds"),
474
+ )
475
+
476
+ # Run evaluations
477
+ if self.tool_evaluator:
478
+ result.tool_match_score = self.tool_evaluator.evaluate(
479
+ test_case, summary_data
480
+ )
481
+
482
+ if self.response_evaluator:
483
+ result.response_match_score = self.response_evaluator.evaluate(
484
+ test_case, summary_data
485
+ )
486
+
487
+ if self.llm_evaluator:
488
+ llm_result = self.llm_evaluator.evaluate(test_case, summary_data)
489
+ if llm_result:
490
+ result.llm_eval_score = llm_result["score"]
491
+ result.llm_eval_reasoning = llm_result["reasoning"]
492
+
493
+ return result
494
+
495
+
496
+ class ModelEvaluator:
497
+ """Evaluates all runs for a single model."""
498
+
499
+ def __init__(self, config: Dict[str, Any], evaluation_settings: Dict[str, Any]):
500
+ self.config = config
501
+ self.evaluation_settings = evaluation_settings
502
+ self.run_evaluator = RunEvaluator(evaluation_settings)
503
+ self.statistics_service = StatisticsService()
504
+
505
+ def evaluate_model(self, model_name: str, base_results_path: str) -> ModelResults:
506
+ """Evaluate all test cases for a model."""
507
+ logger.info(f"Evaluating model: {model_name}")
508
+
509
+ model_results_path = os.path.join(base_results_path, model_name)
510
+
511
+ # Collect all evaluation tasks
512
+ tasks = self._collect_evaluation_tasks(model_results_path)
513
+
514
+ # Run evaluations in parallel
515
+ model_results_data = defaultdict(list)
516
+ with concurrent.futures.ThreadPoolExecutor() as executor:
517
+ future_to_run = {
518
+ executor.submit(self.run_evaluator.evaluate_run, *task): task
519
+ for task in tasks
520
+ }
521
+
522
+ for future in concurrent.futures.as_completed(future_to_run):
523
+ try:
524
+ result = future.result()
525
+ if result:
526
+ model_results_data[result.test_case_id].append(result)
527
+ except Exception as e:
528
+ logger.error(f"An error occurred during evaluation: {e}")
529
+
530
+ # Aggregate results by test case
531
+ test_case_results = []
532
+ for test_case_id, runs in model_results_data.items():
533
+ if runs:
534
+ test_case_result = self._aggregate_test_case_results(test_case_id, runs)
535
+ test_case_results.append(test_case_result)
536
+
537
+ return ModelResults(
538
+ model_name=model_name,
539
+ total_execution_time=None, # Will be set by orchestrator
540
+ test_cases=test_case_results,
541
+ )
542
+
543
+ def _collect_evaluation_tasks(
544
+ self, model_results_path: str
545
+ ) -> List[Tuple[int, str, Dict[str, Any], str]]:
546
+ """Collect all evaluation tasks for the model."""
547
+ tasks = []
548
+
549
+ for test_case_path in self.config["test_cases"]:
550
+ test_case = load_test_case(test_case_path)
551
+ test_case_id = test_case["test_case_id"]
552
+ test_case_results_path = os.path.join(model_results_path, test_case_id)
553
+
554
+ for i in range(1, self.config["runs"] + 1):
555
+ run_path = os.path.join(test_case_results_path, f"run_{i}")
556
+ tasks.append((i, run_path, test_case, test_case_path))
557
+
558
+ return tasks
559
+
560
+ def _aggregate_test_case_results(
561
+ self, test_case_id: str, runs: List[EvaluationResult]
562
+ ) -> TestCaseResults:
563
+ """Aggregate results for a test case across multiple runs."""
564
+ # Load test case to get category
565
+ test_case_path = runs[0].test_case_path
566
+ test_case = load_test_case(test_case_path)
567
+
568
+ # Extract scores for statistics
569
+ tool_scores = [
570
+ r.tool_match_score for r in runs if r.tool_match_score is not None
571
+ ]
572
+ response_scores = [
573
+ r.response_match_score for r in runs if r.response_match_score is not None
574
+ ]
575
+ llm_scores = [r.llm_eval_score for r in runs if r.llm_eval_score is not None]
576
+ duration_scores = [
577
+ r.duration_seconds for r in runs if r.duration_seconds is not None
578
+ ]
579
+
580
+ return TestCaseResults(
581
+ test_case_id=test_case_id,
582
+ category=test_case["category"],
583
+ runs=runs,
584
+ average_duration=self.statistics_service.calculate_average_duration(
585
+ duration_scores
586
+ ),
587
+ tool_match_scores=self.statistics_service.calculate_score_statistics(
588
+ tool_scores
589
+ ),
590
+ response_match_scores=self.statistics_service.calculate_score_statistics(
591
+ response_scores
592
+ ),
593
+ llm_eval_scores=self.statistics_service.calculate_score_statistics(
594
+ llm_scores
595
+ ),
596
+ )
597
+
598
+
599
+ class ResultsWriter:
600
+ """Handles writing evaluation results to files."""
601
+
602
+ def __init__(self):
603
+ self.file_service = FileService()
604
+
605
+ def write_model_results(self, model_results: ModelResults, base_results_path: str):
606
+ """Write model results to file."""
607
+ results_path = os.path.join(
608
+ base_results_path, model_results.model_name, "results.json"
609
+ )
610
+ self.file_service.save_json(model_results.to_dict(), results_path)
611
+ logger.info(
612
+ f"Results for model {model_results.model_name} written to {results_path}"
613
+ )
614
+
615
+
616
+ class EvaluationOrchestrator:
617
+ """Main orchestrator that coordinates the entire evaluation process."""
618
+
619
+ def __init__(self, config_path: str):
620
+ self.config_service = ConfigurationService(config_path)
621
+ self.results_writer = ResultsWriter()
622
+
623
+ def run_evaluation(
624
+ self,
625
+ base_results_path: str,
626
+ model_execution_times: Optional[Dict[str, float]] = None,
627
+ ):
628
+ """Main entry point for the evaluation process."""
629
+ logger.info("--- Starting evaluation ---")
630
+
631
+ if model_execution_times is None:
632
+ model_execution_times = {}
633
+
634
+ config = self.config_service.get_config()
635
+ evaluation_settings = self.config_service.get_evaluation_settings()
636
+
637
+ model_evaluator = ModelEvaluator(config, evaluation_settings)
638
+
639
+ for model_config in config["llm_models"]:
640
+ model_name = model_config["name"]
641
+
642
+ # Evaluate the model
643
+ model_results = model_evaluator.evaluate_model(
644
+ model_name, base_results_path
645
+ )
646
+
647
+ # Add execution time if available
648
+ execution_time = model_execution_times.get(model_name)
649
+ if execution_time is not None:
650
+ model_results.total_execution_time = execution_time
651
+
652
+ # Write results to file
653
+ self.results_writer.write_model_results(model_results, base_results_path)
654
+
655
+ logger.info("--- Evaluation finished ---")
656
+
657
+
658
+ def main(config_path: str = "evaluation/test_suite_config.json"):
659
+ """Main entry point for command-line usage."""
660
+ orchestrator = EvaluationOrchestrator(config_path)
661
+ results_path = orchestrator.config_service.get_results_path()
662
+ orchestrator.run_evaluation(results_path)
663
+
664
+
665
+ if __name__ == "__main__":
666
+ # This will be updated later to parse CLI args.
667
+ main()