solace-agent-mesh 0.2.4__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of solace-agent-mesh might be problematic. Click here for more details.

Files changed (518) hide show
  1. solace_agent_mesh/agent/adk/adk_llm.txt +93 -0
  2. solace_agent_mesh/agent/adk/app_llm_agent.py +26 -0
  3. solace_agent_mesh/agent/adk/callbacks.py +1694 -0
  4. solace_agent_mesh/agent/adk/filesystem_artifact_service.py +381 -0
  5. solace_agent_mesh/agent/adk/invocation_monitor.py +295 -0
  6. solace_agent_mesh/agent/adk/models/lite_llm.py +872 -0
  7. solace_agent_mesh/agent/adk/models/models_llm.txt +94 -0
  8. solace_agent_mesh/agent/adk/runner.py +353 -0
  9. solace_agent_mesh/agent/adk/services.py +240 -0
  10. solace_agent_mesh/agent/adk/setup.py +751 -0
  11. solace_agent_mesh/agent/adk/stream_parser.py +214 -0
  12. solace_agent_mesh/agent/adk/tool_wrapper.py +139 -0
  13. solace_agent_mesh/agent/agent_llm.txt +41 -0
  14. solace_agent_mesh/agent/protocol/event_handlers.py +1469 -0
  15. solace_agent_mesh/agent/protocol/protocol_llm.txt +21 -0
  16. solace_agent_mesh/agent/sac/app.py +640 -0
  17. solace_agent_mesh/agent/sac/component.py +3388 -0
  18. solace_agent_mesh/agent/sac/patch_adk.py +111 -0
  19. solace_agent_mesh/agent/sac/sac_llm.txt +105 -0
  20. solace_agent_mesh/agent/sac/task_execution_context.py +176 -0
  21. solace_agent_mesh/agent/testing/__init__.py +3 -0
  22. solace_agent_mesh/agent/testing/debug_utils.py +135 -0
  23. solace_agent_mesh/agent/testing/testing_llm.txt +90 -0
  24. solace_agent_mesh/agent/tools/__init__.py +14 -0
  25. solace_agent_mesh/agent/tools/audio_tools.py +1622 -0
  26. solace_agent_mesh/agent/tools/builtin_artifact_tools.py +1954 -0
  27. solace_agent_mesh/agent/tools/builtin_data_analysis_tools.py +238 -0
  28. solace_agent_mesh/agent/tools/general_agent_tools.py +569 -0
  29. solace_agent_mesh/agent/tools/image_tools.py +1184 -0
  30. solace_agent_mesh/agent/tools/peer_agent_tool.py +289 -0
  31. solace_agent_mesh/agent/tools/registry.py +36 -0
  32. solace_agent_mesh/agent/tools/test_tools.py +135 -0
  33. solace_agent_mesh/agent/tools/tool_definition.py +45 -0
  34. solace_agent_mesh/agent/tools/tools_llm.txt +104 -0
  35. solace_agent_mesh/agent/tools/web_tools.py +381 -0
  36. solace_agent_mesh/agent/utils/artifact_helpers.py +927 -0
  37. solace_agent_mesh/agent/utils/config_parser.py +47 -0
  38. solace_agent_mesh/agent/utils/context_helpers.py +60 -0
  39. solace_agent_mesh/agent/utils/utils_llm.txt +153 -0
  40. solace_agent_mesh/assets/docs/404.html +16 -0
  41. solace_agent_mesh/assets/docs/assets/css/styles.906a1503.css +1 -0
  42. solace_agent_mesh/assets/docs/assets/images/Solace_AI_Framework_With_Broker-85f0a306a9bcdd20b390b7a949f6d862.png +0 -0
  43. solace_agent_mesh/assets/docs/assets/images/sac-flows-80d5b603c6aafd33e87945680ce0abf3.png +0 -0
  44. solace_agent_mesh/assets/docs/assets/images/sac_parts_of_a_component-cb3d0424b1d0c17734c5435cca6b4082.png +0 -0
  45. solace_agent_mesh/assets/docs/assets/js/04989206.674a8007.js +1 -0
  46. solace_agent_mesh/assets/docs/assets/js/0e682baa.79f0ab22.js +1 -0
  47. solace_agent_mesh/assets/docs/assets/js/1001.0182a8bd.js +1 -0
  48. solace_agent_mesh/assets/docs/assets/js/1023fc19.015679ca.js +1 -0
  49. solace_agent_mesh/assets/docs/assets/js/1039.0bd46aa1.js +1 -0
  50. solace_agent_mesh/assets/docs/assets/js/149.b797a808.js +1 -0
  51. solace_agent_mesh/assets/docs/assets/js/1523c6b4.91c7bc01.js +1 -0
  52. solace_agent_mesh/assets/docs/assets/js/165.6a39807d.js +2 -0
  53. solace_agent_mesh/assets/docs/assets/js/165.6a39807d.js.LICENSE.txt +9 -0
  54. solace_agent_mesh/assets/docs/assets/js/166ab619.7d97ccaf.js +1 -0
  55. solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js +2 -0
  56. solace_agent_mesh/assets/docs/assets/js/17896441.a5e82f9b.js.LICENSE.txt +7 -0
  57. solace_agent_mesh/assets/docs/assets/js/1c6e87d2.23bccffb.js +1 -0
  58. solace_agent_mesh/assets/docs/assets/js/2130.ab9fd314.js +1 -0
  59. solace_agent_mesh/assets/docs/assets/js/21ceee5f.614fa8dd.js +1 -0
  60. solace_agent_mesh/assets/docs/assets/js/2237.5e477fc6.js +1 -0
  61. solace_agent_mesh/assets/docs/assets/js/2334.622a6395.js +1 -0
  62. solace_agent_mesh/assets/docs/assets/js/2a9cab12.8909df92.js +1 -0
  63. solace_agent_mesh/assets/docs/assets/js/3219.adc1d663.js +1 -0
  64. solace_agent_mesh/assets/docs/assets/js/332e10b5.7a103f42.js +1 -0
  65. solace_agent_mesh/assets/docs/assets/js/3624.b524e433.js +1 -0
  66. solace_agent_mesh/assets/docs/assets/js/375.708d48db.js +1 -0
  67. solace_agent_mesh/assets/docs/assets/js/3834.b6cd790e.js +1 -0
  68. solace_agent_mesh/assets/docs/assets/js/3d406171.f722eaf5.js +1 -0
  69. solace_agent_mesh/assets/docs/assets/js/4250.95455b28.js +1 -0
  70. solace_agent_mesh/assets/docs/assets/js/42b3f8d8.36090198.js +1 -0
  71. solace_agent_mesh/assets/docs/assets/js/4356.d169ab5b.js +1 -0
  72. solace_agent_mesh/assets/docs/assets/js/442a8107.5ba94b65.js +1 -0
  73. solace_agent_mesh/assets/docs/assets/js/4458.518e66fa.js +1 -0
  74. solace_agent_mesh/assets/docs/assets/js/4488.c7cc3442.js +1 -0
  75. solace_agent_mesh/assets/docs/assets/js/4494.6ee23046.js +1 -0
  76. solace_agent_mesh/assets/docs/assets/js/4855.fc4444b6.js +1 -0
  77. solace_agent_mesh/assets/docs/assets/js/4866.22daefc0.js +1 -0
  78. solace_agent_mesh/assets/docs/assets/js/4950.ca4caeda.js +1 -0
  79. solace_agent_mesh/assets/docs/assets/js/4c2787c2.66ee00e9.js +1 -0
  80. solace_agent_mesh/assets/docs/assets/js/5388.7a136447.js +1 -0
  81. solace_agent_mesh/assets/docs/assets/js/55f47984.c484bf96.js +1 -0
  82. solace_agent_mesh/assets/docs/assets/js/5607.081356f8.js +1 -0
  83. solace_agent_mesh/assets/docs/assets/js/5864.b0d0e9de.js +1 -0
  84. solace_agent_mesh/assets/docs/assets/js/5b4258a4.bda20761.js +1 -0
  85. solace_agent_mesh/assets/docs/assets/js/5e95c892.558d5167.js +1 -0
  86. solace_agent_mesh/assets/docs/assets/js/6143.0a1464c9.js +1 -0
  87. solace_agent_mesh/assets/docs/assets/js/6395.e9c73649.js +1 -0
  88. solace_agent_mesh/assets/docs/assets/js/6796.51d2c9b7.js +1 -0
  89. solace_agent_mesh/assets/docs/assets/js/6976.379be23b.js +1 -0
  90. solace_agent_mesh/assets/docs/assets/js/6978.ee0b945c.js +1 -0
  91. solace_agent_mesh/assets/docs/assets/js/7040.cb436723.js +1 -0
  92. solace_agent_mesh/assets/docs/assets/js/7195.412f418a.js +1 -0
  93. solace_agent_mesh/assets/docs/assets/js/7280.3fb73bdb.js +1 -0
  94. solace_agent_mesh/assets/docs/assets/js/768e31b0.a12673db.js +1 -0
  95. solace_agent_mesh/assets/docs/assets/js/7845.e33e7c4c.js +1 -0
  96. solace_agent_mesh/assets/docs/assets/js/7900.69516146.js +1 -0
  97. solace_agent_mesh/assets/docs/assets/js/8356.8a379c04.js +1 -0
  98. solace_agent_mesh/assets/docs/assets/js/85387663.6bf41934.js +1 -0
  99. solace_agent_mesh/assets/docs/assets/js/8567.4732c6b7.js +1 -0
  100. solace_agent_mesh/assets/docs/assets/js/8573.cb04eda5.js +1 -0
  101. solace_agent_mesh/assets/docs/assets/js/8577.1d54e766.js +1 -0
  102. solace_agent_mesh/assets/docs/assets/js/8591.d7c16be6.js +2 -0
  103. solace_agent_mesh/assets/docs/assets/js/8591.d7c16be6.js.LICENSE.txt +61 -0
  104. solace_agent_mesh/assets/docs/assets/js/8709.7ecd4047.js +1 -0
  105. solace_agent_mesh/assets/docs/assets/js/8731.49e930c2.js +1 -0
  106. solace_agent_mesh/assets/docs/assets/js/8908.f9d1b506.js +1 -0
  107. solace_agent_mesh/assets/docs/assets/js/9157.b4093d07.js +1 -0
  108. solace_agent_mesh/assets/docs/assets/js/9278.a4fd875d.js +1 -0
  109. solace_agent_mesh/assets/docs/assets/js/945fb41e.74d728aa.js +1 -0
  110. solace_agent_mesh/assets/docs/assets/js/9616.b75c2f6d.js +1 -0
  111. solace_agent_mesh/assets/docs/assets/js/9793.c6d16376.js +1 -0
  112. solace_agent_mesh/assets/docs/assets/js/9eff14a2.1bf8f61c.js +1 -0
  113. solace_agent_mesh/assets/docs/assets/js/a3a92b25.26ca071f.js +1 -0
  114. solace_agent_mesh/assets/docs/assets/js/a7bd4aaa.2204d2f7.js +1 -0
  115. solace_agent_mesh/assets/docs/assets/js/a94703ab.0438dbc2.js +1 -0
  116. solace_agent_mesh/assets/docs/assets/js/aba21aa0.c42a534c.js +1 -0
  117. solace_agent_mesh/assets/docs/assets/js/aba87c2f.d3e2dcc3.js +1 -0
  118. solace_agent_mesh/assets/docs/assets/js/ae4415af.8e279b5d.js +1 -0
  119. solace_agent_mesh/assets/docs/assets/js/b7006a3a.40b10c9d.js +1 -0
  120. solace_agent_mesh/assets/docs/assets/js/bac0be12.f50d9bac.js +1 -0
  121. solace_agent_mesh/assets/docs/assets/js/bb2ef573.207e6990.js +1 -0
  122. solace_agent_mesh/assets/docs/assets/js/c2c06897.63b76e9e.js +1 -0
  123. solace_agent_mesh/assets/docs/assets/js/cc969b05.954186d4.js +1 -0
  124. solace_agent_mesh/assets/docs/assets/js/cd3d4052.ca6eed8c.js +1 -0
  125. solace_agent_mesh/assets/docs/assets/js/ced92a13.fb92e7ca.js +1 -0
  126. solace_agent_mesh/assets/docs/assets/js/cee5d587.f5b73ca1.js +1 -0
  127. solace_agent_mesh/assets/docs/assets/js/f284c35a.ecc3d195.js +1 -0
  128. solace_agent_mesh/assets/docs/assets/js/f897a61a.2c2e152c.js +1 -0
  129. solace_agent_mesh/assets/docs/assets/js/fbfa3e75.aca209c9.js +1 -0
  130. solace_agent_mesh/assets/docs/assets/js/main.7ed3319f.js +2 -0
  131. solace_agent_mesh/assets/docs/assets/js/main.7ed3319f.js.LICENSE.txt +81 -0
  132. solace_agent_mesh/assets/docs/assets/js/runtime~main.d9520ae2.js +1 -0
  133. solace_agent_mesh/assets/docs/docs/documentation/concepts/agents/index.html +128 -0
  134. solace_agent_mesh/assets/docs/docs/documentation/concepts/architecture/index.html +91 -0
  135. solace_agent_mesh/assets/docs/docs/documentation/concepts/cli/index.html +201 -0
  136. solace_agent_mesh/assets/docs/docs/documentation/concepts/gateways/index.html +91 -0
  137. solace_agent_mesh/assets/docs/docs/documentation/concepts/orchestrator/index.html +55 -0
  138. solace_agent_mesh/assets/docs/docs/documentation/concepts/plugins/index.html +82 -0
  139. solace_agent_mesh/assets/docs/docs/documentation/deployment/debugging/index.html +60 -0
  140. solace_agent_mesh/assets/docs/docs/documentation/deployment/deploy/index.html +48 -0
  141. solace_agent_mesh/assets/docs/docs/documentation/deployment/observability/index.html +54 -0
  142. solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +17 -0
  143. solace_agent_mesh/assets/docs/docs/documentation/getting-started/component-overview/index.html +45 -0
  144. solace_agent_mesh/assets/docs/docs/documentation/getting-started/installation/index.html +76 -0
  145. solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +150 -0
  146. solace_agent_mesh/assets/docs/docs/documentation/getting-started/quick-start/index.html +54 -0
  147. solace_agent_mesh/assets/docs/docs/documentation/tutorials/bedrock-agents/index.html +267 -0
  148. solace_agent_mesh/assets/docs/docs/documentation/tutorials/custom-agent/index.html +136 -0
  149. solace_agent_mesh/assets/docs/docs/documentation/tutorials/event-mesh-gateway/index.html +116 -0
  150. solace_agent_mesh/assets/docs/docs/documentation/tutorials/mcp-integration/index.html +80 -0
  151. solace_agent_mesh/assets/docs/docs/documentation/tutorials/mongodb-integration/index.html +164 -0
  152. solace_agent_mesh/assets/docs/docs/documentation/tutorials/rest-gateway/index.html +57 -0
  153. solace_agent_mesh/assets/docs/docs/documentation/tutorials/slack-integration/index.html +72 -0
  154. solace_agent_mesh/assets/docs/docs/documentation/tutorials/sql-database/index.html +102 -0
  155. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/artifact-management/index.html +99 -0
  156. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/audio-tools/index.html +90 -0
  157. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/data-analysis-tools/index.html +107 -0
  158. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/embeds/index.html +152 -0
  159. solace_agent_mesh/assets/docs/docs/documentation/user-guide/builtin-tools/index.html +103 -0
  160. solace_agent_mesh/assets/docs/docs/documentation/user-guide/create-agents/index.html +170 -0
  161. solace_agent_mesh/assets/docs/docs/documentation/user-guide/create-gateways/index.html +200 -0
  162. solace_agent_mesh/assets/docs/docs/documentation/user-guide/creating-service-providers/index.html +54 -0
  163. solace_agent_mesh/assets/docs/docs/documentation/user-guide/solace-ai-connector/index.html +69 -0
  164. solace_agent_mesh/assets/docs/docs/documentation/user-guide/structure/index.html +59 -0
  165. solace_agent_mesh/assets/docs/img/Solace_AI_Framework_README.png +0 -0
  166. solace_agent_mesh/assets/docs/img/Solace_AI_Framework_With_Broker.png +0 -0
  167. solace_agent_mesh/assets/docs/img/logo.png +0 -0
  168. solace_agent_mesh/assets/docs/img/sac-flows.png +0 -0
  169. solace_agent_mesh/assets/docs/img/sac_parts_of_a_component.png +0 -0
  170. solace_agent_mesh/assets/docs/img/solace-logo.png +0 -0
  171. solace_agent_mesh/assets/docs/lunr-index-1753813536522.json +1 -0
  172. solace_agent_mesh/assets/docs/lunr-index.json +1 -0
  173. solace_agent_mesh/assets/docs/search-doc-1753813536522.json +1 -0
  174. solace_agent_mesh/assets/docs/search-doc.json +1 -0
  175. solace_agent_mesh/assets/docs/sitemap.xml +1 -0
  176. solace_agent_mesh/cli/__init__.py +1 -1
  177. solace_agent_mesh/cli/commands/add_cmd/__init__.py +15 -0
  178. solace_agent_mesh/cli/commands/add_cmd/add_cmd_llm.txt +250 -0
  179. solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +659 -0
  180. solace_agent_mesh/cli/commands/add_cmd/gateway_cmd.py +322 -0
  181. solace_agent_mesh/cli/commands/add_cmd/web_add_agent_step.py +93 -0
  182. solace_agent_mesh/cli/commands/add_cmd/web_add_gateway_step.py +118 -0
  183. solace_agent_mesh/cli/commands/docs_cmd.py +57 -0
  184. solace_agent_mesh/cli/commands/eval_cmd.py +64 -0
  185. solace_agent_mesh/cli/commands/init_cmd/__init__.py +404 -0
  186. solace_agent_mesh/cli/commands/init_cmd/broker_step.py +201 -0
  187. solace_agent_mesh/cli/commands/init_cmd/directory_step.py +28 -0
  188. solace_agent_mesh/cli/commands/init_cmd/env_step.py +197 -0
  189. solace_agent_mesh/cli/commands/init_cmd/init_cmd_llm.txt +365 -0
  190. solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +387 -0
  191. solace_agent_mesh/cli/commands/init_cmd/project_files_step.py +38 -0
  192. solace_agent_mesh/cli/commands/init_cmd/web_init_step.py +110 -0
  193. solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +183 -0
  194. solace_agent_mesh/cli/commands/plugin_cmd/__init__.py +18 -0
  195. solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +372 -0
  196. solace_agent_mesh/cli/commands/plugin_cmd/build_cmd.py +86 -0
  197. solace_agent_mesh/cli/commands/plugin_cmd/catalog_cmd.py +138 -0
  198. solace_agent_mesh/cli/commands/plugin_cmd/create_cmd.py +309 -0
  199. solace_agent_mesh/cli/commands/plugin_cmd/official_registry.py +174 -0
  200. solace_agent_mesh/cli/commands/plugin_cmd/plugin_cmd_llm.txt +305 -0
  201. solace_agent_mesh/cli/commands/run_cmd.py +158 -0
  202. solace_agent_mesh/cli/main.py +17 -294
  203. solace_agent_mesh/cli/utils.py +135 -204
  204. solace_agent_mesh/client/webui/frontend/static/assets/authCallback-DvlO62me.js +1 -0
  205. solace_agent_mesh/client/webui/frontend/static/assets/client-bp6u3qVZ.js +49 -0
  206. solace_agent_mesh/client/webui/frontend/static/assets/favicon-BLgzUch9.ico +0 -0
  207. solace_agent_mesh/client/webui/frontend/static/assets/main-An0a5j5k.js +663 -0
  208. solace_agent_mesh/client/webui/frontend/static/assets/main-Bu5-4Bac.css +1 -0
  209. solace_agent_mesh/client/webui/frontend/static/auth-callback.html +14 -0
  210. solace_agent_mesh/client/webui/frontend/static/index.html +15 -0
  211. solace_agent_mesh/common/__init__.py +1 -0
  212. solace_agent_mesh/common/a2a_protocol.py +564 -0
  213. solace_agent_mesh/common/agent_registry.py +42 -0
  214. solace_agent_mesh/common/client/__init__.py +4 -0
  215. solace_agent_mesh/common/client/card_resolver.py +21 -0
  216. solace_agent_mesh/common/client/client.py +85 -0
  217. solace_agent_mesh/common/client/client_llm.txt +133 -0
  218. solace_agent_mesh/common/common_llm.txt +144 -0
  219. solace_agent_mesh/common/constants.py +1 -14
  220. solace_agent_mesh/common/middleware/__init__.py +12 -0
  221. solace_agent_mesh/common/middleware/config_resolver.py +130 -0
  222. solace_agent_mesh/common/middleware/middleware_llm.txt +174 -0
  223. solace_agent_mesh/common/middleware/registry.py +125 -0
  224. solace_agent_mesh/common/server/__init__.py +4 -0
  225. solace_agent_mesh/common/server/server.py +122 -0
  226. solace_agent_mesh/common/server/server_llm.txt +169 -0
  227. solace_agent_mesh/common/server/task_manager.py +291 -0
  228. solace_agent_mesh/common/server/utils.py +28 -0
  229. solace_agent_mesh/common/services/__init__.py +4 -0
  230. solace_agent_mesh/common/services/employee_service.py +162 -0
  231. solace_agent_mesh/common/services/identity_service.py +129 -0
  232. solace_agent_mesh/common/services/providers/__init__.py +4 -0
  233. solace_agent_mesh/common/services/providers/local_file_identity_service.py +148 -0
  234. solace_agent_mesh/common/services/providers/providers_llm.txt +113 -0
  235. solace_agent_mesh/common/services/services_llm.txt +132 -0
  236. solace_agent_mesh/common/types.py +411 -0
  237. solace_agent_mesh/common/utils/__init__.py +7 -0
  238. solace_agent_mesh/common/utils/asyncio_macos_fix.py +86 -0
  239. solace_agent_mesh/common/utils/embeds/__init__.py +33 -0
  240. solace_agent_mesh/common/utils/embeds/constants.py +55 -0
  241. solace_agent_mesh/common/utils/embeds/converter.py +452 -0
  242. solace_agent_mesh/common/utils/embeds/embeds_llm.txt +124 -0
  243. solace_agent_mesh/common/utils/embeds/evaluators.py +394 -0
  244. solace_agent_mesh/common/utils/embeds/modifiers.py +816 -0
  245. solace_agent_mesh/common/utils/embeds/resolver.py +865 -0
  246. solace_agent_mesh/common/utils/embeds/types.py +14 -0
  247. solace_agent_mesh/common/utils/in_memory_cache.py +108 -0
  248. solace_agent_mesh/common/utils/log_formatters.py +44 -0
  249. solace_agent_mesh/common/utils/mime_helpers.py +106 -0
  250. solace_agent_mesh/common/utils/push_notification_auth.py +134 -0
  251. solace_agent_mesh/common/utils/utils_llm.txt +67 -0
  252. solace_agent_mesh/config_portal/backend/common.py +66 -24
  253. solace_agent_mesh/config_portal/backend/plugin_catalog/constants.py +23 -0
  254. solace_agent_mesh/config_portal/backend/plugin_catalog/models.py +49 -0
  255. solace_agent_mesh/config_portal/backend/plugin_catalog/registry_manager.py +160 -0
  256. solace_agent_mesh/config_portal/backend/plugin_catalog/scraper.py +525 -0
  257. solace_agent_mesh/config_portal/backend/plugin_catalog_server.py +216 -0
  258. solace_agent_mesh/config_portal/backend/server.py +550 -181
  259. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-DNxCwAGB.js +48 -0
  260. solace_agent_mesh/config_portal/frontend/static/client/assets/components-B7lKcHVY.js +140 -0
  261. solace_agent_mesh/config_portal/frontend/static/client/assets/{entry.client-DX1misIU.js → entry.client-CEumGClk.js} +3 -3
  262. solace_agent_mesh/config_portal/frontend/static/client/assets/index-DSo1AH_7.js +68 -0
  263. solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-d2b54a97.js +1 -0
  264. solace_agent_mesh/config_portal/frontend/static/client/assets/{root-BApq5dPK.js → root-C4XmHinv.js} +2 -2
  265. solace_agent_mesh/config_portal/frontend/static/client/assets/root-DxRwaWiE.css +1 -0
  266. solace_agent_mesh/config_portal/frontend/static/client/index.html +3 -3
  267. solace_agent_mesh/core_a2a/__init__.py +1 -0
  268. solace_agent_mesh/core_a2a/core_a2a_llm.txt +88 -0
  269. solace_agent_mesh/core_a2a/service.py +331 -0
  270. solace_agent_mesh/evaluation/config_loader.py +657 -0
  271. solace_agent_mesh/evaluation/evaluator.py +667 -0
  272. solace_agent_mesh/evaluation/message_organizer.py +568 -0
  273. solace_agent_mesh/evaluation/report/benchmark_info.html +35 -0
  274. solace_agent_mesh/evaluation/report/chart_section.html +141 -0
  275. solace_agent_mesh/evaluation/report/detailed_breakdown.html +28 -0
  276. solace_agent_mesh/evaluation/report/modal.html +59 -0
  277. solace_agent_mesh/evaluation/report/modal_chart_functions.js +411 -0
  278. solace_agent_mesh/evaluation/report/modal_script.js +296 -0
  279. solace_agent_mesh/evaluation/report/modal_styles.css +340 -0
  280. solace_agent_mesh/evaluation/report/performance_metrics_styles.css +93 -0
  281. solace_agent_mesh/evaluation/report/templates/footer.html +2 -0
  282. solace_agent_mesh/evaluation/report/templates/header.html +340 -0
  283. solace_agent_mesh/evaluation/report_data_processor.py +972 -0
  284. solace_agent_mesh/evaluation/report_generator.py +613 -0
  285. solace_agent_mesh/evaluation/run.py +613 -0
  286. solace_agent_mesh/evaluation/subscriber.py +872 -0
  287. solace_agent_mesh/evaluation/summary_builder.py +775 -0
  288. solace_agent_mesh/evaluation/test_case_loader.py +714 -0
  289. solace_agent_mesh/gateway/base/__init__.py +1 -0
  290. solace_agent_mesh/gateway/base/app.py +266 -0
  291. solace_agent_mesh/gateway/base/base_llm.txt +119 -0
  292. solace_agent_mesh/gateway/base/component.py +1542 -0
  293. solace_agent_mesh/gateway/base/task_context.py +74 -0
  294. solace_agent_mesh/gateway/gateway_llm.txt +125 -0
  295. solace_agent_mesh/gateway/http_sse/app.py +190 -0
  296. solace_agent_mesh/gateway/http_sse/component.py +1602 -0
  297. solace_agent_mesh/gateway/http_sse/components/__init__.py +7 -0
  298. solace_agent_mesh/gateway/http_sse/components/components_llm.txt +65 -0
  299. solace_agent_mesh/gateway/http_sse/components/visualization_forwarder_component.py +108 -0
  300. solace_agent_mesh/gateway/http_sse/dependencies.py +316 -0
  301. solace_agent_mesh/gateway/http_sse/http_sse_llm.txt +63 -0
  302. solace_agent_mesh/gateway/http_sse/main.py +442 -0
  303. solace_agent_mesh/gateway/http_sse/routers/__init__.py +4 -0
  304. solace_agent_mesh/gateway/http_sse/routers/agents.py +41 -0
  305. solace_agent_mesh/gateway/http_sse/routers/artifacts.py +821 -0
  306. solace_agent_mesh/gateway/http_sse/routers/auth.py +212 -0
  307. solace_agent_mesh/gateway/http_sse/routers/config.py +55 -0
  308. solace_agent_mesh/gateway/http_sse/routers/people.py +69 -0
  309. solace_agent_mesh/gateway/http_sse/routers/routers_llm.txt +37 -0
  310. solace_agent_mesh/gateway/http_sse/routers/sessions.py +80 -0
  311. solace_agent_mesh/gateway/http_sse/routers/sse.py +138 -0
  312. solace_agent_mesh/gateway/http_sse/routers/tasks.py +294 -0
  313. solace_agent_mesh/gateway/http_sse/routers/users.py +59 -0
  314. solace_agent_mesh/gateway/http_sse/routers/visualization.py +1131 -0
  315. solace_agent_mesh/gateway/http_sse/services/__init__.py +4 -0
  316. solace_agent_mesh/gateway/http_sse/services/agent_service.py +69 -0
  317. solace_agent_mesh/gateway/http_sse/services/people_service.py +158 -0
  318. solace_agent_mesh/gateway/http_sse/services/services_llm.txt +179 -0
  319. solace_agent_mesh/gateway/http_sse/services/task_service.py +121 -0
  320. solace_agent_mesh/gateway/http_sse/session_manager.py +187 -0
  321. solace_agent_mesh/gateway/http_sse/sse_manager.py +328 -0
  322. solace_agent_mesh/llm.txt +228 -0
  323. solace_agent_mesh/llm_detail.txt +2835 -0
  324. solace_agent_mesh/templates/agent_template.yaml +53 -0
  325. solace_agent_mesh/templates/eval_backend_template.yaml +54 -0
  326. solace_agent_mesh/templates/gateway_app_template.py +73 -0
  327. solace_agent_mesh/templates/gateway_component_template.py +400 -0
  328. solace_agent_mesh/templates/gateway_config_template.yaml +43 -0
  329. solace_agent_mesh/templates/main_orchestrator.yaml +55 -0
  330. solace_agent_mesh/templates/plugin_agent_config_template.yaml +122 -0
  331. solace_agent_mesh/templates/plugin_custom_config_template.yaml +27 -0
  332. solace_agent_mesh/templates/plugin_custom_template.py +10 -0
  333. solace_agent_mesh/templates/plugin_gateway_config_template.yaml +63 -0
  334. solace_agent_mesh/templates/plugin_pyproject_template.toml +33 -0
  335. solace_agent_mesh/templates/plugin_readme_template.md +34 -0
  336. solace_agent_mesh/templates/plugin_tools_template.py +224 -0
  337. solace_agent_mesh/templates/shared_config.yaml +66 -0
  338. solace_agent_mesh/templates/templates_llm.txt +147 -0
  339. solace_agent_mesh/templates/webui.yaml +53 -0
  340. solace_agent_mesh-1.0.1.dist-info/METADATA +432 -0
  341. solace_agent_mesh-1.0.1.dist-info/RECORD +359 -0
  342. solace_agent_mesh-1.0.1.dist-info/entry_points.txt +3 -0
  343. {solace_agent_mesh-0.2.4.dist-info → solace_agent_mesh-1.0.1.dist-info}/licenses/LICENSE +1 -1
  344. solace_agent_mesh/agents/base_agent_component.py +0 -256
  345. solace_agent_mesh/agents/global/actions/agent_state_change.py +0 -54
  346. solace_agent_mesh/agents/global/actions/clear_history.py +0 -32
  347. solace_agent_mesh/agents/global/actions/convert_file_to_markdown.py +0 -160
  348. solace_agent_mesh/agents/global/actions/create_file.py +0 -70
  349. solace_agent_mesh/agents/global/actions/error_action.py +0 -45
  350. solace_agent_mesh/agents/global/actions/plantuml_diagram.py +0 -163
  351. solace_agent_mesh/agents/global/actions/plotly_graph.py +0 -152
  352. solace_agent_mesh/agents/global/actions/retrieve_file.py +0 -51
  353. solace_agent_mesh/agents/global/global_agent_component.py +0 -38
  354. solace_agent_mesh/agents/image_processing/actions/create_image.py +0 -75
  355. solace_agent_mesh/agents/image_processing/actions/describe_image.py +0 -115
  356. solace_agent_mesh/agents/image_processing/image_processing_agent_component.py +0 -23
  357. solace_agent_mesh/agents/slack/__init__.py +0 -1
  358. solace_agent_mesh/agents/slack/actions/__init__.py +0 -1
  359. solace_agent_mesh/agents/slack/actions/post_message.py +0 -177
  360. solace_agent_mesh/agents/slack/slack_agent_component.py +0 -59
  361. solace_agent_mesh/agents/web_request/actions/do_image_search.py +0 -84
  362. solace_agent_mesh/agents/web_request/actions/do_news_search.py +0 -47
  363. solace_agent_mesh/agents/web_request/actions/do_suggestion_search.py +0 -34
  364. solace_agent_mesh/agents/web_request/actions/do_web_request.py +0 -135
  365. solace_agent_mesh/agents/web_request/actions/download_file.py +0 -69
  366. solace_agent_mesh/agents/web_request/web_request_agent_component.py +0 -33
  367. solace_agent_mesh/assets/web-visualizer/assets/index-D0qORgkg.css +0 -1
  368. solace_agent_mesh/assets/web-visualizer/assets/index-DnDr1pnu.js +0 -109
  369. solace_agent_mesh/assets/web-visualizer/index.html +0 -14
  370. solace_agent_mesh/assets/web-visualizer/vite.svg +0 -1
  371. solace_agent_mesh/cli/commands/add/__init__.py +0 -3
  372. solace_agent_mesh/cli/commands/add/add.py +0 -88
  373. solace_agent_mesh/cli/commands/add/agent.py +0 -110
  374. solace_agent_mesh/cli/commands/add/copy_from_plugin.py +0 -92
  375. solace_agent_mesh/cli/commands/add/gateway.py +0 -374
  376. solace_agent_mesh/cli/commands/build.py +0 -670
  377. solace_agent_mesh/cli/commands/chat/__init__.py +0 -3
  378. solace_agent_mesh/cli/commands/chat/chat.py +0 -361
  379. solace_agent_mesh/cli/commands/config.py +0 -29
  380. solace_agent_mesh/cli/commands/init/__init__.py +0 -3
  381. solace_agent_mesh/cli/commands/init/ai_provider_step.py +0 -93
  382. solace_agent_mesh/cli/commands/init/broker_step.py +0 -99
  383. solace_agent_mesh/cli/commands/init/builtin_agent_step.py +0 -83
  384. solace_agent_mesh/cli/commands/init/check_if_already_done.py +0 -13
  385. solace_agent_mesh/cli/commands/init/create_config_file_step.py +0 -65
  386. solace_agent_mesh/cli/commands/init/create_other_project_files_step.py +0 -147
  387. solace_agent_mesh/cli/commands/init/file_service_step.py +0 -73
  388. solace_agent_mesh/cli/commands/init/init.py +0 -92
  389. solace_agent_mesh/cli/commands/init/project_structure_step.py +0 -16
  390. solace_agent_mesh/cli/commands/init/web_init_step.py +0 -32
  391. solace_agent_mesh/cli/commands/plugin/__init__.py +0 -3
  392. solace_agent_mesh/cli/commands/plugin/add.py +0 -100
  393. solace_agent_mesh/cli/commands/plugin/build.py +0 -268
  394. solace_agent_mesh/cli/commands/plugin/create.py +0 -117
  395. solace_agent_mesh/cli/commands/plugin/plugin.py +0 -124
  396. solace_agent_mesh/cli/commands/plugin/remove.py +0 -73
  397. solace_agent_mesh/cli/commands/run.py +0 -68
  398. solace_agent_mesh/cli/commands/visualizer.py +0 -138
  399. solace_agent_mesh/cli/config.py +0 -85
  400. solace_agent_mesh/common/action.py +0 -91
  401. solace_agent_mesh/common/action_list.py +0 -37
  402. solace_agent_mesh/common/action_response.py +0 -340
  403. solace_agent_mesh/common/mysql_database.py +0 -40
  404. solace_agent_mesh/common/postgres_database.py +0 -85
  405. solace_agent_mesh/common/prompt_templates.py +0 -28
  406. solace_agent_mesh/common/stimulus_utils.py +0 -152
  407. solace_agent_mesh/common/time.py +0 -24
  408. solace_agent_mesh/common/utils.py +0 -712
  409. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-a-zJ6rLx.js +0 -46
  410. solace_agent_mesh/config_portal/frontend/static/client/assets/components-ZIfdTbrV.js +0 -191
  411. solace_agent_mesh/config_portal/frontend/static/client/assets/index-BJHAE5s4.js +0 -17
  412. solace_agent_mesh/config_portal/frontend/static/client/assets/manifest-44c41103.js +0 -1
  413. solace_agent_mesh/config_portal/frontend/static/client/assets/root-DX4gQ516.css +0 -1
  414. solace_agent_mesh/configs/agent_global.yaml +0 -74
  415. solace_agent_mesh/configs/agent_image_processing.yaml +0 -82
  416. solace_agent_mesh/configs/agent_slack.yaml +0 -64
  417. solace_agent_mesh/configs/agent_web_request.yaml +0 -75
  418. solace_agent_mesh/configs/conversation_to_file.yaml +0 -56
  419. solace_agent_mesh/configs/error_catcher.yaml +0 -56
  420. solace_agent_mesh/configs/monitor.yaml +0 -0
  421. solace_agent_mesh/configs/monitor_stim_and_errors_to_slack.yaml +0 -109
  422. solace_agent_mesh/configs/monitor_user_feedback.yaml +0 -58
  423. solace_agent_mesh/configs/orchestrator.yaml +0 -241
  424. solace_agent_mesh/configs/service_embedding.yaml +0 -81
  425. solace_agent_mesh/configs/service_llm.yaml +0 -265
  426. solace_agent_mesh/configs/visualize_websocket.yaml +0 -55
  427. solace_agent_mesh/gateway/components/gateway_base.py +0 -47
  428. solace_agent_mesh/gateway/components/gateway_input.py +0 -278
  429. solace_agent_mesh/gateway/components/gateway_output.py +0 -298
  430. solace_agent_mesh/gateway/identity/bamboohr_identity.py +0 -18
  431. solace_agent_mesh/gateway/identity/identity_base.py +0 -10
  432. solace_agent_mesh/gateway/identity/identity_provider.py +0 -60
  433. solace_agent_mesh/gateway/identity/no_identity.py +0 -9
  434. solace_agent_mesh/gateway/identity/passthru_identity.py +0 -9
  435. solace_agent_mesh/monitors/base_monitor_component.py +0 -26
  436. solace_agent_mesh/monitors/feedback/user_feedback_monitor.py +0 -75
  437. solace_agent_mesh/monitors/stim_and_errors/stim_and_error_monitor.py +0 -560
  438. solace_agent_mesh/orchestrator/__init__.py +0 -0
  439. solace_agent_mesh/orchestrator/action_manager.py +0 -237
  440. solace_agent_mesh/orchestrator/components/__init__.py +0 -0
  441. solace_agent_mesh/orchestrator/components/orchestrator_action_manager_timeout_component.py +0 -58
  442. solace_agent_mesh/orchestrator/components/orchestrator_action_response_component.py +0 -179
  443. solace_agent_mesh/orchestrator/components/orchestrator_register_component.py +0 -107
  444. solace_agent_mesh/orchestrator/components/orchestrator_stimulus_processor_component.py +0 -527
  445. solace_agent_mesh/orchestrator/components/orchestrator_streaming_output_component.py +0 -260
  446. solace_agent_mesh/orchestrator/orchestrator_main.py +0 -172
  447. solace_agent_mesh/orchestrator/orchestrator_prompt.py +0 -539
  448. solace_agent_mesh/services/__init__.py +0 -0
  449. solace_agent_mesh/services/authorization/providers/base_authorization_provider.py +0 -56
  450. solace_agent_mesh/services/bamboo_hr_service/__init__.py +0 -3
  451. solace_agent_mesh/services/bamboo_hr_service/bamboo_hr.py +0 -182
  452. solace_agent_mesh/services/common/__init__.py +0 -4
  453. solace_agent_mesh/services/common/auto_expiry.py +0 -45
  454. solace_agent_mesh/services/common/singleton.py +0 -18
  455. solace_agent_mesh/services/file_service/__init__.py +0 -14
  456. solace_agent_mesh/services/file_service/file_manager/__init__.py +0 -0
  457. solace_agent_mesh/services/file_service/file_manager/bucket_file_manager.py +0 -149
  458. solace_agent_mesh/services/file_service/file_manager/file_manager_base.py +0 -162
  459. solace_agent_mesh/services/file_service/file_manager/memory_file_manager.py +0 -64
  460. solace_agent_mesh/services/file_service/file_manager/volume_file_manager.py +0 -106
  461. solace_agent_mesh/services/file_service/file_service.py +0 -437
  462. solace_agent_mesh/services/file_service/file_service_constants.py +0 -54
  463. solace_agent_mesh/services/file_service/file_transformations.py +0 -141
  464. solace_agent_mesh/services/file_service/file_utils.py +0 -324
  465. solace_agent_mesh/services/file_service/transformers/__init__.py +0 -5
  466. solace_agent_mesh/services/history_service/__init__.py +0 -3
  467. solace_agent_mesh/services/history_service/history_providers/__init__.py +0 -0
  468. solace_agent_mesh/services/history_service/history_providers/base_history_provider.py +0 -54
  469. solace_agent_mesh/services/history_service/history_providers/file_history_provider.py +0 -74
  470. solace_agent_mesh/services/history_service/history_providers/index.py +0 -40
  471. solace_agent_mesh/services/history_service/history_providers/memory_history_provider.py +0 -33
  472. solace_agent_mesh/services/history_service/history_providers/mongodb_history_provider.py +0 -66
  473. solace_agent_mesh/services/history_service/history_providers/redis_history_provider.py +0 -66
  474. solace_agent_mesh/services/history_service/history_providers/sql_history_provider.py +0 -93
  475. solace_agent_mesh/services/history_service/history_service.py +0 -413
  476. solace_agent_mesh/services/history_service/long_term_memory/__init__.py +0 -0
  477. solace_agent_mesh/services/history_service/long_term_memory/long_term_memory.py +0 -399
  478. solace_agent_mesh/services/llm_service/components/llm_request_component.py +0 -340
  479. solace_agent_mesh/services/llm_service/components/llm_service_component_base.py +0 -152
  480. solace_agent_mesh/services/middleware_service/__init__.py +0 -0
  481. solace_agent_mesh/services/middleware_service/middleware_service.py +0 -20
  482. solace_agent_mesh/templates/action.py +0 -38
  483. solace_agent_mesh/templates/agent.py +0 -29
  484. solace_agent_mesh/templates/agent.yaml +0 -70
  485. solace_agent_mesh/templates/gateway-config-template.yaml +0 -6
  486. solace_agent_mesh/templates/gateway-default-config.yaml +0 -28
  487. solace_agent_mesh/templates/gateway-flows.yaml +0 -78
  488. solace_agent_mesh/templates/gateway-header.yaml +0 -16
  489. solace_agent_mesh/templates/gateway_base.py +0 -15
  490. solace_agent_mesh/templates/gateway_input.py +0 -98
  491. solace_agent_mesh/templates/gateway_output.py +0 -71
  492. solace_agent_mesh/templates/plugin-gateway-default-config.yaml +0 -29
  493. solace_agent_mesh/templates/plugin-pyproject.toml +0 -30
  494. solace_agent_mesh/templates/rest-api-default-config.yaml +0 -31
  495. solace_agent_mesh/templates/rest-api-flows.yaml +0 -81
  496. solace_agent_mesh/templates/slack-default-config.yaml +0 -16
  497. solace_agent_mesh/templates/slack-flows.yaml +0 -81
  498. solace_agent_mesh/templates/solace-agent-mesh-default.yaml +0 -86
  499. solace_agent_mesh/templates/solace-agent-mesh-plugin-default.yaml +0 -8
  500. solace_agent_mesh/templates/web-default-config.yaml +0 -10
  501. solace_agent_mesh/templates/web-flows.yaml +0 -76
  502. solace_agent_mesh/tools/__init__.py +0 -0
  503. solace_agent_mesh/tools/components/__init__.py +0 -0
  504. solace_agent_mesh/tools/components/conversation_formatter.py +0 -111
  505. solace_agent_mesh/tools/components/file_resolver_component.py +0 -58
  506. solace_agent_mesh/tools/config/runtime_config.py +0 -26
  507. solace_agent_mesh-0.2.4.dist-info/METADATA +0 -176
  508. solace_agent_mesh-0.2.4.dist-info/RECORD +0 -193
  509. solace_agent_mesh-0.2.4.dist-info/entry_points.txt +0 -3
  510. /solace_agent_mesh/{agents → agent}/__init__.py +0 -0
  511. /solace_agent_mesh/{agents/global → agent/adk}/__init__.py +0 -0
  512. /solace_agent_mesh/{agents/global/actions → agent/protocol}/__init__.py +0 -0
  513. /solace_agent_mesh/{agents/image_processing → agent/sac}/__init__.py +0 -0
  514. /solace_agent_mesh/{agents/image_processing/actions → agent/utils}/__init__.py +0 -0
  515. /solace_agent_mesh/{agents/web_request → config_portal/backend/plugin_catalog}/__init__.py +0 -0
  516. /solace_agent_mesh/{agents/web_request/actions → evaluation}/__init__.py +0 -0
  517. /solace_agent_mesh/gateway/{components → http_sse}/__init__.py +0 -0
  518. {solace_agent_mesh-0.2.4.dist-info → solace_agent_mesh-1.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,972 @@
1
+ """
2
+ Refactored report data processor with improved structure and readability.
3
+ This module extracts and processes evaluation data for HTML report generation.
4
+ """
5
+
6
+ import json
7
+ import logging
8
+ from datetime import datetime
9
+ from dataclasses import dataclass, field
10
+ from typing import Dict, List, Optional, Any, Set, Tuple
11
+ from pathlib import Path
12
+ from collections import defaultdict, Counter
13
+ import random
14
+
15
+ # Import test case loader
16
+ from .test_case_loader import load_test_case
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class EvaluationMetrics:
25
+ """Core evaluation data structure."""
26
+
27
+ models: List[str] = field(default_factory=list)
28
+ total_execution_time: Optional[float] = None
29
+ total_execution_time_formatted: str = "Not available"
30
+ generation_time: str = field(
31
+ default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S")
32
+ )
33
+ timestamp: str = field(
34
+ default_factory=lambda: datetime.now().strftime("%B %d, %Y at %I:%M %p")
35
+ )
36
+ runs: str = "Not available"
37
+ total_tests: int = 0
38
+ duration: str = "Not available"
39
+ test_case_names: List[str] = field(default_factory=list)
40
+
41
+
42
+ @dataclass
43
+ class ModelPerformance:
44
+ """Individual model performance data."""
45
+
46
+ model_name: str
47
+ average_score: float = 0.0
48
+ success_rate: float = 0.0
49
+ test_count: int = 0
50
+ estimated_cost: float = 0.0
51
+ scores: List[float] = field(default_factory=list)
52
+
53
+
54
+ @dataclass
55
+ class TestCaseResult:
56
+ """Test case specific results."""
57
+
58
+ test_case_id: str
59
+ category: str
60
+ description: str = ""
61
+ model_results: Dict[str, Any] = field(default_factory=dict)
62
+ average_score: float = 0.0
63
+
64
+
65
+ @dataclass
66
+ class ChartConfiguration:
67
+ """Chart and visualization data."""
68
+
69
+ categories: List[str] = field(default_factory=list)
70
+ datasets: List[Dict[str, Any]] = field(default_factory=list)
71
+ category_scores: Dict[str, Dict[str, float]] = field(default_factory=dict)
72
+
73
+
74
+ @dataclass
75
+ class CategoryStatistics:
76
+ """Category-based statistics."""
77
+
78
+ category_name: str
79
+ test_cases: List[str] = field(default_factory=list)
80
+ model_scores: Dict[str, float] = field(default_factory=dict)
81
+
82
+
83
+ class FileService:
84
+ """Handles file I/O operations with proper error handling."""
85
+
86
+ @staticmethod
87
+ def load_json(filepath: Path) -> Any:
88
+ """Load JSON data from file."""
89
+ try:
90
+ with open(filepath, "r") as f:
91
+ return json.load(f)
92
+ except FileNotFoundError:
93
+ logger.warning(f"File not found: {filepath}")
94
+ return None
95
+ except json.JSONDecodeError as e:
96
+ logger.error(f"Invalid JSON in file {filepath}: {e}")
97
+ return None
98
+ except Exception as e:
99
+ logger.error(f"Error reading file {filepath}: {e}")
100
+ return None
101
+
102
+ @staticmethod
103
+ def file_exists(filepath: Path) -> bool:
104
+ """Check if file exists."""
105
+ return filepath.exists() and filepath.is_file()
106
+
107
+ @staticmethod
108
+ def list_directories(path: Path) -> List[str]:
109
+ """List directories in the given path."""
110
+ try:
111
+ return [
112
+ item.name
113
+ for item in path.iterdir()
114
+ if item.is_dir() and not item.name.startswith(".")
115
+ ]
116
+ except Exception as e:
117
+ logger.error(f"Error listing directories in {path}: {e}")
118
+ return []
119
+
120
+
121
+ class ResultsExtractionService:
122
+ """Extracts raw data from results directories."""
123
+
124
+ def __init__(self, file_service: FileService):
125
+ self.file_service = file_service
126
+
127
+ def extract_model_results(self, results_dir: Path) -> Dict[str, Any]:
128
+ """Extract results for all models."""
129
+ model_results = {}
130
+
131
+ for model_name in self.file_service.list_directories(results_dir):
132
+ model_path = results_dir / model_name
133
+ results_file = model_path / "results.json"
134
+
135
+ if self.file_service.file_exists(results_file):
136
+ results_data = self.file_service.load_json(results_file)
137
+ if results_data:
138
+ model_results[model_name] = results_data
139
+ logger.debug(f"Loaded results for model: {model_name}")
140
+
141
+ logger.info(f"Extracted results for {len(model_results)} models")
142
+ return model_results
143
+
144
+ def extract_execution_stats(self, results_dir: Path) -> Optional[Dict[str, Any]]:
145
+ """Extract execution statistics."""
146
+ stats_file = results_dir / "stats.json"
147
+
148
+ if self.file_service.file_exists(stats_file):
149
+ stats_data = self.file_service.load_json(stats_file)
150
+ if stats_data:
151
+ logger.debug("Loaded execution statistics")
152
+ return stats_data
153
+
154
+ logger.warning("No execution statistics found")
155
+ return None
156
+
157
+
158
+ class MetricsCalculationService:
159
+ """Calculates performance statistics and aggregations."""
160
+
161
+ @staticmethod
162
+ def calculate_model_performance(
163
+ model_name: str, results_data: Dict[str, Any]
164
+ ) -> ModelPerformance:
165
+ """Calculate performance metrics for a single model."""
166
+ performance = ModelPerformance(model_name=model_name)
167
+
168
+ scores = []
169
+ test_count = 0
170
+
171
+ # Handle new data structure with test_cases array
172
+ if "test_cases" in results_data:
173
+ for test_case in results_data["test_cases"]:
174
+ if "runs" in test_case:
175
+ for run_data in test_case["runs"]:
176
+ if isinstance(run_data, dict):
177
+ # Use llm_eval score if available, otherwise use response_match
178
+ score = run_data.get("llm_eval", {}).get("score")
179
+ if score is None:
180
+ score = run_data.get("response_match", 0)
181
+
182
+ # Ensure score is valid
183
+ if score is not None and isinstance(score, (int, float)):
184
+ scores.append(score)
185
+
186
+ test_count += 1
187
+
188
+ # Calculate metrics
189
+ if scores:
190
+ performance.scores = scores
191
+ performance.average_score = sum(scores) / len(scores)
192
+ performance.success_rate = (
193
+ len([s for s in scores if s >= 0.7]) / len(scores) * 100
194
+ )
195
+
196
+ performance.test_count = test_count
197
+ performance.estimated_cost = test_count * 0.02 # Rough estimate
198
+
199
+ return performance
200
+
201
+ @staticmethod
202
+ def format_execution_time(total_time: float) -> Tuple[str, str]:
203
+ """Format execution time into readable strings."""
204
+ minutes = int(total_time // 60)
205
+ seconds = int(total_time % 60)
206
+ formatted = f"{minutes}m {seconds}s"
207
+ duration = f"{minutes} minutes {seconds} seconds"
208
+ return formatted, duration
209
+
210
+ @staticmethod
211
+ def calculate_run_statistics(model_results: Dict[str, Any]) -> Tuple[int, str]:
212
+ """Calculate run statistics from model results."""
213
+ test_cases = set()
214
+ all_run_counts = []
215
+
216
+ for model_name, results in model_results.items():
217
+ if "test_cases" in results:
218
+ for test_case in results["test_cases"]:
219
+ test_case_id = test_case.get("test_case_id")
220
+ if test_case_id:
221
+ test_cases.add(test_case_id)
222
+
223
+ # Count unique runs
224
+ if "runs" in test_case:
225
+ unique_runs = set()
226
+ for run in test_case["runs"]:
227
+ if isinstance(run, dict) and "run" in run:
228
+ unique_runs.add(run["run"])
229
+ if unique_runs:
230
+ all_run_counts.append(len(unique_runs))
231
+
232
+ total_tests = len(test_cases)
233
+
234
+ # Determine run count description
235
+ if all_run_counts:
236
+ run_count_mode = Counter(all_run_counts).most_common(1)[0][0]
237
+ runs_description = f"{run_count_mode} run{'s' if run_count_mode != 1 else ''} per test case"
238
+ else:
239
+ runs_description = "Not available"
240
+
241
+ return total_tests, runs_description
242
+
243
+
244
+ class ChartDataService:
245
+ """Generates chart and visualization data."""
246
+
247
+ def __init__(self, file_service: FileService):
248
+ self.file_service = file_service
249
+
250
+ def generate_chart_configuration(
251
+ self, model_results: Dict[str, Any], test_cases: Dict[str, Dict[str, Any]]
252
+ ) -> ChartConfiguration:
253
+ """Generate chart configuration data."""
254
+ chart_config = ChartConfiguration()
255
+
256
+ # Extract categories and organize test cases
257
+ category_test_mapping = self._extract_category_mapping(model_results)
258
+
259
+ # Calculate category scores for each model
260
+ category_scores = self._calculate_category_scores(
261
+ category_test_mapping, test_cases, model_results
262
+ )
263
+
264
+ # Prepare chart data
265
+ if category_scores:
266
+ chart_config.categories = sorted(list(category_scores.keys()))
267
+ chart_config.category_scores = category_scores
268
+ chart_config.datasets = self._generate_chart_datasets(
269
+ category_scores, model_results
270
+ )
271
+
272
+ return chart_config
273
+
274
+ def _extract_category_mapping(
275
+ self, model_results: Dict[str, Any]
276
+ ) -> Dict[str, Set[str]]:
277
+ """Extract category to test case mapping."""
278
+ category_test_mapping = defaultdict(set)
279
+
280
+ for model_name, results in model_results.items():
281
+ if "test_cases" in results:
282
+ for test_case in results["test_cases"]:
283
+ test_id = test_case.get("test_case_id")
284
+ category = test_case.get("category")
285
+ if test_id and category:
286
+ category_test_mapping[category].add(test_id)
287
+
288
+ # Convert sets to sorted lists
289
+ return {
290
+ cat: sorted(list(tests)) for cat, tests in category_test_mapping.items()
291
+ }
292
+
293
+ def _calculate_category_scores(
294
+ self,
295
+ category_test_mapping: Dict[str, List[str]],
296
+ test_cases: Dict[str, Dict[str, Any]],
297
+ model_results: Dict[str, Any],
298
+ ) -> Dict[str, Dict[str, float]]:
299
+ """Calculate average scores by category for each model."""
300
+ category_scores = {}
301
+
302
+ for category, test_names in category_test_mapping.items():
303
+ category_scores[category] = {}
304
+
305
+ for model_name in model_results.keys():
306
+ scores = []
307
+
308
+ # Collect scores for this category and model
309
+ for test_name in test_names:
310
+ if test_name in test_cases and model_name in test_cases[test_name]:
311
+ test_data = test_cases[test_name][model_name]
312
+ if isinstance(test_data, dict) and "runs" in test_data:
313
+ for run in test_data["runs"]:
314
+ if isinstance(run, dict):
315
+ # Prioritize llm_eval score over response_match
316
+ score = run.get("llm_eval", {}).get("score")
317
+ if score is not None and isinstance(
318
+ score, (int, float)
319
+ ):
320
+ scores.append(score)
321
+ else:
322
+ score = run.get("response_match", 0)
323
+ if score is not None and isinstance(
324
+ score, (int, float)
325
+ ):
326
+ scores.append(score)
327
+
328
+ # Calculate average score for this category and model
329
+ category_scores[category][model_name] = (
330
+ sum(scores) / len(scores) if scores else 0
331
+ )
332
+
333
+ return category_scores
334
+
335
+ def _generate_chart_datasets(
336
+ self,
337
+ category_scores: Dict[str, Dict[str, float]],
338
+ model_results: Dict[str, Any],
339
+ ) -> List[Dict[str, Any]]:
340
+ """Generate chart datasets for visualization."""
341
+ # Enhanced model colors with better contrast
342
+ model_colors = {
343
+ "gpt-4": "#059669",
344
+ "gpt-4-1": "#10b981",
345
+ "claude-3-sonnet": "#0ea5e9",
346
+ "gemini-pro": "#f59e0b",
347
+ "gemini-2.5-pro": "#f59e0b",
348
+ "gemini-flash": "#8b5cf6",
349
+ "gemini-2.5-flash": "#a855f7",
350
+ "gpt-3.5-turbo": "#ef4444",
351
+ "claude-3-haiku": "#84cc16",
352
+ }
353
+
354
+ chart_datasets = []
355
+ categories = sorted(list(category_scores.keys()))
356
+
357
+ for model_name in sorted(model_results.keys()):
358
+ model_data = []
359
+ for category in categories:
360
+ score = category_scores[category].get(model_name, 0)
361
+ model_data.append(round(score, 3))
362
+
363
+ color = model_colors.get(model_name)
364
+ if color is None:
365
+ # Generate a random color if not in the predefined list
366
+ r = lambda: random.randint(0, 255)
367
+ color = f"#{r():02x}{r():02x}{r():02x}"
368
+
369
+ chart_datasets.append(
370
+ {
371
+ "label": model_name,
372
+ "data": model_data,
373
+ "backgroundColor": color,
374
+ "borderColor": color,
375
+ "borderWidth": 1,
376
+ "borderRadius": 4,
377
+ "borderSkipped": False,
378
+ }
379
+ )
380
+
381
+ return chart_datasets
382
+
383
+
384
+ class ModalDataService:
385
+ """Generates data specifically for modal functionality."""
386
+
387
+ def __init__(self, file_service: FileService):
388
+ self.file_service = file_service
389
+
390
+ def generate_modal_test_data(
391
+ self, test_case_id: str, model_results: Dict[str, Any]
392
+ ) -> Dict[str, Any]:
393
+ """Generate test data for modal JavaScript consumption."""
394
+ modal_data = {"model_scores": {}, "tool_scores": {}, "individual_runs": {}}
395
+
396
+ # Extract data for this specific test case
397
+ for model_name, results in model_results.items():
398
+ if "test_cases" in results:
399
+ for test_case in results["test_cases"]:
400
+ if test_case.get("test_case_id") == test_case_id:
401
+ # Extract model scores
402
+ runs = test_case.get("runs", [])
403
+ if runs:
404
+ response_scores = []
405
+ tool_scores = []
406
+ individual_runs = []
407
+
408
+ for run_data in runs:
409
+ if isinstance(run_data, dict):
410
+ # Get response score (prioritize llm_eval)
411
+ response_score = run_data.get("response_match", 0)
412
+ llm_score = run_data.get("llm_eval", {}).get(
413
+ "score"
414
+ )
415
+
416
+ # Get tool score
417
+ tool_score = run_data.get("tool_match", 1.0)
418
+
419
+ # Get other data
420
+ duration = run_data.get("duration_seconds", 0)
421
+ run_number = run_data.get("run", 1)
422
+ reasoning = run_data.get("llm_eval", {}).get(
423
+ "reasoning", "No reasoning provided"
424
+ )
425
+
426
+ if response_score is not None:
427
+ response_scores.append(response_score)
428
+ tool_scores.append(tool_score)
429
+
430
+ individual_runs.append(
431
+ {
432
+ "run_number": run_number,
433
+ "response_score": response_score,
434
+ "tool_score": tool_score,
435
+ "llm_eval": llm_score,
436
+ "llm_reasoning": reasoning,
437
+ "execution_time": duration,
438
+ "query": "",
439
+ "actual_response": "",
440
+ "expected_response": "",
441
+ }
442
+ )
443
+
444
+ # Calculate averages
445
+ if response_scores:
446
+ modal_data["model_scores"][model_name] = sum(
447
+ response_scores
448
+ ) / len(response_scores)
449
+ modal_data["tool_scores"][model_name] = sum(
450
+ tool_scores
451
+ ) / len(tool_scores)
452
+ modal_data["individual_runs"][
453
+ model_name
454
+ ] = individual_runs
455
+
456
+ return modal_data
457
+
458
+
459
+ class TemplateDataService:
460
+ """Formats data for template consumption."""
461
+
462
+ def __init__(self, file_service: FileService):
463
+ self.file_service = file_service
464
+ self.modal_service = ModalDataService(file_service)
465
+
466
+ def generate_performance_metrics_table(
467
+ self, model_performances: Dict[str, ModelPerformance]
468
+ ) -> str:
469
+ """Generate HTML table rows for performance metrics."""
470
+ metrics_rows = []
471
+
472
+ for model_name, performance in model_performances.items():
473
+ if performance.scores:
474
+ score_class = self._get_score_class(performance.average_score)
475
+
476
+ metrics_rows.append(
477
+ f"""
478
+ <tr>
479
+ <td class="model-name">{model_name}</td>
480
+ <td class="metric-value {score_class}">{performance.average_score:.2f}</td>
481
+ <td class="metric-value {score_class}">{performance.success_rate:.0f}%</td>
482
+ <td class="metric-value">{performance.test_count}</td>
483
+ <td class="estimated-cost">${performance.estimated_cost:.2f}</td>
484
+ </tr>
485
+ """
486
+ )
487
+
488
+ return "".join(metrics_rows)
489
+
490
+ def generate_breakdown_content(
491
+ self,
492
+ test_case_results: List[TestCaseResult],
493
+ model_performances: Dict[str, ModelPerformance],
494
+ model_results: Dict[str, Any] = None,
495
+ ) -> str:
496
+ """Generate detailed breakdown content by category with modal support."""
497
+ # Group test cases by category
498
+ categories_with_tests = defaultdict(list)
499
+ for test_result in test_case_results:
500
+ categories_with_tests[test_result.category].append(test_result)
501
+
502
+ breakdown_sections = []
503
+
504
+ for category, test_results in sorted(categories_with_tests.items()):
505
+ category_tests = []
506
+
507
+ for test_result in test_results:
508
+ test_scores = []
509
+
510
+ for model_name, performance in model_performances.items():
511
+ if test_result.test_case_id in test_result.model_results:
512
+ model_data = test_result.model_results[
513
+ test_result.test_case_id
514
+ ].get(model_name, {})
515
+
516
+ if isinstance(model_data, dict) and "runs" in model_data:
517
+ scores = []
518
+ durations = []
519
+ success_count = 0
520
+
521
+ for run in model_data["runs"]:
522
+ if isinstance(run, dict):
523
+ # Use llm_eval score if available, otherwise use response_match
524
+ score = run.get("llm_eval", {}).get("score")
525
+ if score is not None and isinstance(
526
+ score, (int, float)
527
+ ):
528
+ scores.append(score)
529
+ else:
530
+ score = run.get("response_match", 0)
531
+ if score is not None and isinstance(
532
+ score, (int, float)
533
+ ):
534
+ scores.append(score)
535
+
536
+ # Track duration and success
537
+ duration = run.get("duration_seconds", 0)
538
+ if duration is not None and isinstance(
539
+ duration, (int, float)
540
+ ):
541
+ durations.append(duration)
542
+ if (
543
+ score is not None
544
+ and isinstance(score, (int, float))
545
+ and score >= 0.7
546
+ ):
547
+ success_count += 1
548
+
549
+ if scores:
550
+ avg_score = sum(scores) / len(scores)
551
+ avg_duration = (
552
+ sum(durations) / len(durations) if durations else 0
553
+ )
554
+ score_class = self._get_score_class(avg_score)
555
+
556
+ test_scores.append(
557
+ f"""
558
+ <div class="model-result {score_class}">
559
+ <span class="model-score">{model_name}</span>
560
+ <span class="score-value">LLM Eval: {avg_score:.3f}</span>
561
+ <span class="avg-duration">Avg time: {avg_duration:.1f}s</span>
562
+ </div>
563
+ """
564
+ )
565
+
566
+ if test_scores:
567
+ # Generate modal data for this test case
568
+ modal_data = {}
569
+ if model_results:
570
+ modal_data = self.modal_service.generate_modal_test_data(
571
+ test_result.test_case_id, model_results
572
+ )
573
+
574
+ # Escape JSON for HTML attribute
575
+ modal_data_json = json.dumps(modal_data).replace('"', "&quot;")
576
+
577
+ category_tests.append(
578
+ f"""
579
+ <div class="test-item"
580
+ data-test-name="{test_result.test_case_id}"
581
+ data-test-description="{test_result.description}"
582
+ data-test-data="{modal_data_json}">
583
+ <div class="test-header">
584
+ <span class="test-name">{test_result.test_case_id}</span>
585
+ <span class="test-description">{test_result.description}</span>
586
+ </div>
587
+ <div class="model-results">
588
+ {''.join(test_scores)}
589
+ </div>
590
+ </div>
591
+ """
592
+ )
593
+
594
+ if category_tests:
595
+ test_count = len(category_tests)
596
+ breakdown_sections.append(
597
+ f"""
598
+ <div class="category-section">
599
+ <div class="category-header">
600
+ <h3 class="category-title">{category} ({test_count} test case{'s' if test_count != 1 else ''})</h3>
601
+ <span class="category-toggle">▶</span>
602
+ </div>
603
+ <div class="category-content">
604
+ {''.join(category_tests)}
605
+ </div>
606
+ </div>
607
+ """
608
+ )
609
+
610
+ return "".join(breakdown_sections)
611
+
612
+ def generate_model_execution_times(self, model_results: Dict[str, Any]) -> str:
613
+ """Generate model execution times HTML."""
614
+ execution_times_html = []
615
+
616
+ # Model colors for consistency
617
+ model_colors = {
618
+ "gpt-4": "#059669",
619
+ "gpt-4-1": "#10b981",
620
+ "claude-3-sonnet": "#0ea5e9",
621
+ "gemini-pro": "#f59e0b",
622
+ "gemini-2.5-pro": "#f59e0b",
623
+ "gemini-flash": "#8b5cf6",
624
+ "gpt-3.5-turbo": "#ef4444",
625
+ "claude-3-haiku": "#84cc16",
626
+ }
627
+
628
+ for model_name in sorted(model_results.keys()):
629
+ results = model_results[model_name]
630
+ execution_time = results.get("total_execution_time")
631
+
632
+ if execution_time is not None:
633
+ # Format time as minutes and seconds
634
+ minutes = int(execution_time // 60)
635
+ seconds = int(execution_time % 60)
636
+ time_formatted = f"{minutes}m {seconds}s"
637
+
638
+ color = model_colors.get(model_name, "#6b7280")
639
+ execution_times_html.append(
640
+ f"""
641
+ <div style="background: #f9fafb; padding: 15px; border-radius: 8px; border-left: 4px solid {color}; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
642
+ <div style="font-weight: 600; color: #1f2937; margin-bottom: 5px; font-size: 1rem;">{model_name}</div>
643
+ <div style="color: {color}; font-weight: 700; font-size: 1.25rem; font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;">{time_formatted}</div>
644
+ <div style="color: #6b7280; font-size: 0.85rem; margin-top: 2px;">Total execution time</div>
645
+ </div>
646
+ """
647
+ )
648
+
649
+ return "".join(execution_times_html)
650
+
651
+ def calculate_best_worst_tests(
652
+ self, test_case_results: List[TestCaseResult]
653
+ ) -> Tuple[str, str]:
654
+ """Calculate best and worst performing tests."""
655
+ test_averages = {}
656
+
657
+ for test_result in test_case_results:
658
+ if test_result.average_score > 0:
659
+ test_averages[test_result.test_case_id] = test_result.average_score
660
+
661
+ if test_averages:
662
+ best_test = max(test_averages, key=test_averages.get)
663
+ worst_test = min(test_averages, key=test_averages.get)
664
+
665
+ best_result = f"{best_test} (avg: {test_averages[best_test]:.2f})"
666
+ worst_result = f"{worst_test} (avg: {test_averages[worst_test]:.2f})"
667
+
668
+ return best_result, worst_result
669
+
670
+ return "Not available", "Not available"
671
+
672
+ def calculate_average_time(
673
+ self, model_performances: Dict[str, ModelPerformance]
674
+ ) -> str:
675
+ """Calculate overall average time."""
676
+ all_durations = []
677
+
678
+ # Extract durations from model results if available
679
+ for performance in model_performances.values():
680
+ if hasattr(performance, "durations") and performance.durations:
681
+ all_durations.extend(performance.durations)
682
+
683
+ if all_durations:
684
+ avg_duration = sum(all_durations) / len(all_durations)
685
+ return f"{avg_duration:.1f}s"
686
+
687
+ return "Not available"
688
+
689
+ def _get_score_class(self, score: float) -> str:
690
+ """Get CSS class based on score value."""
691
+ if score >= 0.7:
692
+ return "score-high"
693
+ elif score >= 0.4:
694
+ return "score-medium"
695
+ else:
696
+ return "score-low"
697
+
698
+
699
+ class ModelResultsProcessor:
700
+ """Processes individual model results."""
701
+
702
+ def __init__(self, file_service: FileService):
703
+ self.file_service = file_service
704
+
705
+ def organize_test_cases(
706
+ self, model_results: Dict[str, Any]
707
+ ) -> Dict[str, Dict[str, Any]]:
708
+ """Organize test cases by test case ID and model."""
709
+ test_cases = {}
710
+
711
+ for model_name, results in model_results.items():
712
+ if "test_cases" in results:
713
+ for test_case in results["test_cases"]:
714
+ test_case_id = test_case.get("test_case_id")
715
+ if test_case_id:
716
+ if test_case_id not in test_cases:
717
+ test_cases[test_case_id] = {}
718
+ test_cases[test_case_id][model_name] = test_case
719
+
720
+ return test_cases
721
+
722
+ def create_test_case_results(
723
+ self, test_cases: Dict[str, Dict[str, Any]]
724
+ ) -> List[TestCaseResult]:
725
+ """Create TestCaseResult objects from organized test cases."""
726
+ test_case_results = []
727
+
728
+ for test_case_id, models_data in test_cases.items():
729
+ # Get test case metadata
730
+ try:
731
+ # Get path from the first available run data
732
+ first_model_data = next(iter(models_data.values()))
733
+ test_case_path = first_model_data["runs"][0]["test_case_path"]
734
+ test_case_data = load_test_case(test_case_path)
735
+ category = test_case_data.get("category", "Uncategorized")
736
+ description = test_case_data.get(
737
+ "description", f"Test case: {test_case_id}"
738
+ )
739
+ except (StopIteration, IndexError, KeyError):
740
+ category = "Uncategorized"
741
+ description = f"Test case: {test_case_id}"
742
+
743
+ # Calculate average score across all models and runs
744
+ all_scores = []
745
+ for model_data in models_data.values():
746
+ if isinstance(model_data, dict) and "runs" in model_data:
747
+ for run in model_data["runs"]:
748
+ if isinstance(run, dict):
749
+ score = run.get("llm_eval", {}).get("score")
750
+ if score is None:
751
+ score = run.get("response_match", 0)
752
+ if score is not None and isinstance(score, (int, float)):
753
+ all_scores.append(score)
754
+
755
+ average_score = sum(all_scores) / len(all_scores) if all_scores else 0
756
+
757
+ test_result = TestCaseResult(
758
+ test_case_id=test_case_id,
759
+ category=category,
760
+ description=description,
761
+ model_results={test_case_id: models_data},
762
+ average_score=average_score,
763
+ )
764
+
765
+ test_case_results.append(test_result)
766
+
767
+ return test_case_results
768
+
769
+
770
+ class ReportDataProcessor:
771
+ """Main processor that coordinates the entire data processing pipeline."""
772
+
773
+ def __init__(self):
774
+ self.file_service = FileService()
775
+ self.extraction_service = ResultsExtractionService(self.file_service)
776
+ self.metrics_service = MetricsCalculationService()
777
+ self.chart_service = ChartDataService(self.file_service)
778
+ self.template_service = TemplateDataService(self.file_service)
779
+ self.processor = ModelResultsProcessor(self.file_service)
780
+
781
+ def get_evaluation_data(self, results_dir: Path) -> Dict[str, Any]:
782
+ """Extract and process basic evaluation data."""
783
+ logger.info("Processing evaluation data...")
784
+
785
+ # Initialize metrics
786
+ metrics = EvaluationMetrics()
787
+
788
+ # Extract model results
789
+ model_results = self.extraction_service.extract_model_results(results_dir)
790
+ if not model_results:
791
+ logger.warning("No model results found")
792
+ return self._metrics_to_dict(metrics)
793
+
794
+ # Set basic model information
795
+ metrics.models = list(model_results.keys())
796
+
797
+ # Calculate test statistics
798
+ total_tests, runs_description = self.metrics_service.calculate_run_statistics(
799
+ model_results
800
+ )
801
+ metrics.total_tests = total_tests
802
+ metrics.runs = runs_description
803
+ metrics.test_case_names = self._extract_test_case_names(model_results)
804
+
805
+ # Extract execution statistics
806
+ stats_data = self.extraction_service.extract_execution_stats(results_dir)
807
+ if stats_data and "total_execution_time" in stats_data:
808
+ total_time = stats_data["total_execution_time"]
809
+ metrics.total_execution_time = total_time
810
+ formatted_time, duration = self.metrics_service.format_execution_time(
811
+ total_time
812
+ )
813
+ metrics.total_execution_time_formatted = formatted_time
814
+ metrics.duration = duration
815
+
816
+ logger.info(f"Processed evaluation data for {len(metrics.models)} models")
817
+ return self._metrics_to_dict(metrics)
818
+
819
+ def get_detailed_evaluation_data(self, results_dir: Path) -> Dict[str, Any]:
820
+ """Extract and process detailed evaluation data for charts and breakdowns."""
821
+ logger.info("Processing detailed evaluation data...")
822
+
823
+ # Extract model results
824
+ model_results = self.extraction_service.extract_model_results(results_dir)
825
+ if not model_results:
826
+ logger.warning("No model results found for detailed data")
827
+ return self._empty_detailed_data()
828
+
829
+ # Calculate model performances
830
+ model_performances = {}
831
+ total_evaluations = 0
832
+
833
+ for model_name, results_data in model_results.items():
834
+ performance = self.metrics_service.calculate_model_performance(
835
+ model_name, results_data
836
+ )
837
+ model_performances[model_name] = performance
838
+ total_evaluations += performance.test_count
839
+
840
+ # Organize test cases
841
+ test_cases = self.processor.organize_test_cases(model_results)
842
+ test_case_results = self.processor.create_test_case_results(test_cases)
843
+
844
+ # Generate chart configuration
845
+ chart_config = self.chart_service.generate_chart_configuration(
846
+ model_results, test_cases
847
+ )
848
+
849
+ # Generate template data
850
+ performance_metrics_rows = (
851
+ self.template_service.generate_performance_metrics_table(model_performances)
852
+ )
853
+ breakdown_content = self.template_service.generate_breakdown_content(
854
+ test_case_results, model_performances, model_results
855
+ )
856
+ model_execution_times = self.template_service.generate_model_execution_times(
857
+ model_results
858
+ )
859
+
860
+ # Calculate best/worst tests and average time
861
+ best_test, worst_test = self.template_service.calculate_best_worst_tests(
862
+ test_case_results
863
+ )
864
+ avg_time = self.template_service.calculate_average_time(model_performances)
865
+
866
+ detailed_data = {
867
+ "performance_metrics_rows": performance_metrics_rows,
868
+ "breakdown_content": breakdown_content,
869
+ "best_test": best_test,
870
+ "worst_test": worst_test,
871
+ "avg_time": avg_time,
872
+ "total_evaluations": total_evaluations,
873
+ "categories_data": json.dumps(chart_config.categories),
874
+ "chart_datasets_data": json.dumps(chart_config.datasets),
875
+ "model_execution_times": model_execution_times,
876
+ }
877
+
878
+ logger.info("Processed detailed evaluation data successfully")
879
+ return detailed_data
880
+
881
+ def _extract_test_case_names(self, model_results: Dict[str, Any]) -> List[str]:
882
+ """Extract unique test case names from model results."""
883
+ test_case_names = set()
884
+
885
+ for results in model_results.values():
886
+ if "test_cases" in results:
887
+ for test_case in results["test_cases"]:
888
+ test_case_id = test_case.get("test_case_id")
889
+ if test_case_id:
890
+ test_case_names.add(test_case_id)
891
+
892
+ return sorted(list(test_case_names))
893
+
894
+ def _metrics_to_dict(self, metrics: EvaluationMetrics) -> Dict[str, Any]:
895
+ """Convert EvaluationMetrics to dictionary."""
896
+ # Generate model tags HTML
897
+ model_tags = ""
898
+ if metrics.models:
899
+ model_tags = "".join(
900
+ [f'<span class="model-tag">{model}</span>' for model in metrics.models]
901
+ )
902
+
903
+ # Generate test cases list HTML
904
+ test_cases_list = ""
905
+ if metrics.test_case_names:
906
+ test_cases_list = "".join(
907
+ [
908
+ f"<li>{test_case}.test.json</li>"
909
+ for test_case in metrics.test_case_names
910
+ ]
911
+ )
912
+ elif metrics.total_tests > 0:
913
+ test_cases_list = "<li>Test cases available (names not loaded)</li>"
914
+
915
+ return {
916
+ "models": metrics.models,
917
+ "total_execution_time": metrics.total_execution_time,
918
+ "total_execution_time_formatted": metrics.total_execution_time_formatted,
919
+ "generation_time": metrics.generation_time,
920
+ "timestamp": metrics.timestamp,
921
+ "runs": metrics.runs,
922
+ "total_tests": metrics.total_tests,
923
+ "duration": metrics.duration,
924
+ "test_case_names": metrics.test_case_names,
925
+ # Template-specific keys
926
+ "total_models": str(len(metrics.models)),
927
+ "model_tags": model_tags,
928
+ "test_cases_list": test_cases_list,
929
+ }
930
+
931
+ def _empty_detailed_data(self) -> Dict[str, Any]:
932
+ """Return empty detailed data structure."""
933
+ return {
934
+ "performance_metrics_rows": "",
935
+ "breakdown_content": "",
936
+ "best_test": "Not available",
937
+ "worst_test": "Not available",
938
+ "avg_time": "Not available",
939
+ "total_evaluations": 0,
940
+ "categories_data": "[]",
941
+ "chart_datasets_data": "[]",
942
+ "model_execution_times": "",
943
+ }
944
+
945
+
946
+ def main():
947
+ """Main entry point for testing the report data processor."""
948
+ import sys
949
+ from pathlib import Path
950
+
951
+ if len(sys.argv) > 1:
952
+ results_dir = Path(sys.argv[1])
953
+ else:
954
+ # Default to test results directory
955
+ script_dir = Path(__file__).parent
956
+ results_dir = script_dir / "results" / "tests"
957
+
958
+ processor = ReportDataProcessor()
959
+
960
+ print("Testing evaluation data extraction...")
961
+ eval_data = processor.get_evaluation_data(results_dir)
962
+ print(f"Found {len(eval_data.get('models', []))} models")
963
+
964
+ print("Testing detailed evaluation data extraction...")
965
+ detailed_data = processor.get_detailed_evaluation_data(results_dir)
966
+ print(f"Total evaluations: {detailed_data.get('total_evaluations', 0)}")
967
+
968
+ print("Report data processing completed successfully!")
969
+
970
+
971
+ if __name__ == "__main__":
972
+ main()