solace-agent-mesh 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of solace-agent-mesh might be problematic. Click here for more details.

Files changed (180) hide show
  1. solace_agent_mesh/agent/adk/callbacks.py +0 -5
  2. solace_agent_mesh/agent/adk/models/lite_llm.py +123 -8
  3. solace_agent_mesh/agent/adk/models/oauth2_token_manager.py +245 -0
  4. solace_agent_mesh/agent/protocol/event_handlers.py +40 -1
  5. solace_agent_mesh/agent/proxies/__init__.py +0 -0
  6. solace_agent_mesh/agent/proxies/a2a/__init__.py +3 -0
  7. solace_agent_mesh/agent/proxies/a2a/app.py +55 -0
  8. solace_agent_mesh/agent/proxies/a2a/component.py +1115 -0
  9. solace_agent_mesh/agent/proxies/a2a/config.py +140 -0
  10. solace_agent_mesh/agent/proxies/a2a/oauth_token_cache.py +104 -0
  11. solace_agent_mesh/agent/proxies/base/__init__.py +3 -0
  12. solace_agent_mesh/agent/proxies/base/app.py +99 -0
  13. solace_agent_mesh/agent/proxies/base/component.py +619 -0
  14. solace_agent_mesh/agent/proxies/base/config.py +85 -0
  15. solace_agent_mesh/agent/proxies/base/proxy_task_context.py +17 -0
  16. solace_agent_mesh/agent/sac/app.py +9 -3
  17. solace_agent_mesh/agent/sac/component.py +160 -8
  18. solace_agent_mesh/agent/tools/audio_tools.py +125 -8
  19. solace_agent_mesh/agent/tools/web_tools.py +10 -5
  20. solace_agent_mesh/agent/utils/artifact_helpers.py +141 -3
  21. solace_agent_mesh/assets/docs/404.html +3 -3
  22. solace_agent_mesh/assets/docs/assets/js/5c2bd65f.eda4bcb2.js +1 -0
  23. solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.f4b15f3b.js +1 -0
  24. solace_agent_mesh/assets/docs/assets/js/71da7b71.38583438.js +1 -0
  25. solace_agent_mesh/assets/docs/assets/js/77cf947d.48cb18a2.js +1 -0
  26. solace_agent_mesh/assets/docs/assets/js/924ffdeb.8095e148.js +1 -0
  27. solace_agent_mesh/assets/docs/assets/js/9e9d0a82.570c057b.js +1 -0
  28. solace_agent_mesh/assets/docs/assets/js/{ad71b5ed.60668e9e.js → ad71b5ed.af3ecfd1.js} +1 -1
  29. solace_agent_mesh/assets/docs/assets/js/ceb2a7a6.5d92d7d0.js +1 -0
  30. solace_agent_mesh/assets/docs/assets/js/{da0b5bad.9d369087.js → da0b5bad.d08a9466.js} +1 -1
  31. solace_agent_mesh/assets/docs/assets/js/db924877.e98d12a1.js +1 -0
  32. solace_agent_mesh/assets/docs/assets/js/de915948.27d6b065.js +1 -0
  33. solace_agent_mesh/assets/docs/assets/js/e6f9706b.e74a984d.js +1 -0
  34. solace_agent_mesh/assets/docs/assets/js/f284c35a.42f59cdd.js +1 -0
  35. solace_agent_mesh/assets/docs/assets/js/ff4d71f2.15b02f97.js +1 -0
  36. solace_agent_mesh/assets/docs/assets/js/{main.bd3c34f3.js → main.20feee82.js} +2 -2
  37. solace_agent_mesh/assets/docs/assets/js/runtime~main.0d198646.js +1 -0
  38. solace_agent_mesh/assets/docs/docs/documentation/components/agents/index.html +15 -4
  39. solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/artifact-management/index.html +4 -4
  40. solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/audio-tools/index.html +4 -4
  41. solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/data-analysis-tools/index.html +4 -4
  42. solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/embeds/index.html +4 -4
  43. solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/index.html +4 -4
  44. solace_agent_mesh/assets/docs/docs/documentation/components/cli/index.html +4 -4
  45. solace_agent_mesh/assets/docs/docs/documentation/components/gateways/index.html +4 -4
  46. solace_agent_mesh/assets/docs/docs/documentation/components/index.html +4 -4
  47. solace_agent_mesh/assets/docs/docs/documentation/components/orchestrator/index.html +4 -4
  48. solace_agent_mesh/assets/docs/docs/documentation/components/plugins/index.html +4 -4
  49. solace_agent_mesh/assets/docs/docs/documentation/components/proxies/index.html +262 -0
  50. solace_agent_mesh/assets/docs/docs/documentation/deploying/debugging/index.html +3 -3
  51. solace_agent_mesh/assets/docs/docs/documentation/deploying/deployment-options/index.html +31 -3
  52. solace_agent_mesh/assets/docs/docs/documentation/deploying/index.html +3 -3
  53. solace_agent_mesh/assets/docs/docs/documentation/deploying/observability/index.html +3 -3
  54. solace_agent_mesh/assets/docs/docs/documentation/developing/create-agents/index.html +4 -4
  55. solace_agent_mesh/assets/docs/docs/documentation/developing/create-gateways/index.html +5 -5
  56. solace_agent_mesh/assets/docs/docs/documentation/developing/creating-python-tools/index.html +4 -4
  57. solace_agent_mesh/assets/docs/docs/documentation/developing/creating-service-providers/index.html +4 -4
  58. solace_agent_mesh/assets/docs/docs/documentation/developing/evaluations/index.html +135 -0
  59. solace_agent_mesh/assets/docs/docs/documentation/developing/index.html +6 -4
  60. solace_agent_mesh/assets/docs/docs/documentation/developing/structure/index.html +4 -4
  61. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/bedrock-agents/index.html +4 -4
  62. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/custom-agent/index.html +4 -4
  63. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/event-mesh-gateway/index.html +5 -5
  64. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mcp-integration/index.html +4 -4
  65. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mongodb-integration/index.html +4 -4
  66. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rag-integration/index.html +4 -4
  67. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rest-gateway/index.html +4 -4
  68. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/slack-integration/index.html +4 -4
  69. solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/sql-database/index.html +4 -4
  70. solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +3 -3
  71. solace_agent_mesh/assets/docs/docs/documentation/enterprise/installation/index.html +3 -3
  72. solace_agent_mesh/assets/docs/docs/documentation/enterprise/rbac-setup-guide/index.html +3 -3
  73. solace_agent_mesh/assets/docs/docs/documentation/enterprise/single-sign-on/index.html +3 -3
  74. solace_agent_mesh/assets/docs/docs/documentation/getting-started/architecture/index.html +3 -3
  75. solace_agent_mesh/assets/docs/docs/documentation/getting-started/index.html +3 -3
  76. solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +3 -3
  77. solace_agent_mesh/assets/docs/docs/documentation/getting-started/try-agent-mesh/index.html +3 -3
  78. solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/configurations/index.html +6 -5
  79. solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/index.html +3 -3
  80. solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/installation/index.html +3 -3
  81. solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/large_language_models/index.html +100 -3
  82. solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/run-project/index.html +3 -3
  83. solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-gateway-upgrade-to-0.3.0/index.html +3 -3
  84. solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-technical-migration-map/index.html +3 -3
  85. solace_agent_mesh/assets/docs/lunr-index-1761165361160.json +1 -0
  86. solace_agent_mesh/assets/docs/lunr-index.json +1 -1
  87. solace_agent_mesh/assets/docs/search-doc-1761165361160.json +1 -0
  88. solace_agent_mesh/assets/docs/search-doc.json +1 -1
  89. solace_agent_mesh/assets/docs/sitemap.xml +1 -1
  90. solace_agent_mesh/cli/__init__.py +1 -1
  91. solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +2 -69
  92. solace_agent_mesh/cli/commands/eval_cmd.py +11 -49
  93. solace_agent_mesh/cli/commands/init_cmd/__init__.py +0 -5
  94. solace_agent_mesh/cli/commands/init_cmd/env_step.py +10 -12
  95. solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +9 -61
  96. solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +9 -49
  97. solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +1 -2
  98. solace_agent_mesh/client/webui/frontend/static/assets/{authCallback-DwrxZE0E.js → authCallback-BTf6dqwp.js} +1 -1
  99. solace_agent_mesh/client/webui/frontend/static/assets/{client-DarGQzyw.js → client-CaY59VuC.js} +1 -1
  100. solace_agent_mesh/client/webui/frontend/static/assets/main-BGTaW0uv.js +342 -0
  101. solace_agent_mesh/client/webui/frontend/static/assets/main-DHJKSW1S.css +1 -0
  102. solace_agent_mesh/client/webui/frontend/static/assets/{vendor-BKIeiHj_.js → vendor-BEmvJSYz.js} +1 -1
  103. solace_agent_mesh/client/webui/frontend/static/auth-callback.html +3 -3
  104. solace_agent_mesh/client/webui/frontend/static/index.html +4 -4
  105. solace_agent_mesh/common/a2a/__init__.py +24 -0
  106. solace_agent_mesh/common/a2a/artifact.py +39 -0
  107. solace_agent_mesh/common/a2a/events.py +29 -0
  108. solace_agent_mesh/common/a2a/message.py +68 -0
  109. solace_agent_mesh/common/a2a/protocol.py +73 -1
  110. solace_agent_mesh/common/agent_registry.py +83 -3
  111. solace_agent_mesh/common/constants.py +3 -1
  112. solace_agent_mesh/common/utils/pydantic_utils.py +12 -0
  113. solace_agent_mesh/config_portal/backend/common.py +1 -1
  114. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-ByU1X1HD.js +98 -0
  115. solace_agent_mesh/config_portal/frontend/static/client/assets/{manifest-44d62be6.js → manifest-61038fc6.js} +1 -1
  116. solace_agent_mesh/config_portal/frontend/static/client/index.html +1 -1
  117. solace_agent_mesh/evaluation/evaluator.py +128 -104
  118. solace_agent_mesh/evaluation/message_organizer.py +116 -110
  119. solace_agent_mesh/evaluation/report_data_processor.py +84 -86
  120. solace_agent_mesh/evaluation/report_generator.py +73 -79
  121. solace_agent_mesh/evaluation/run.py +421 -235
  122. solace_agent_mesh/evaluation/shared/__init__.py +92 -0
  123. solace_agent_mesh/evaluation/shared/constants.py +47 -0
  124. solace_agent_mesh/evaluation/shared/exceptions.py +50 -0
  125. solace_agent_mesh/evaluation/shared/helpers.py +35 -0
  126. solace_agent_mesh/evaluation/shared/test_case_loader.py +167 -0
  127. solace_agent_mesh/evaluation/shared/test_suite_loader.py +280 -0
  128. solace_agent_mesh/evaluation/subscriber.py +111 -232
  129. solace_agent_mesh/evaluation/summary_builder.py +227 -117
  130. solace_agent_mesh/gateway/base/app.py +1 -1
  131. solace_agent_mesh/gateway/base/component.py +8 -1
  132. solace_agent_mesh/gateway/http_sse/alembic/versions/20251015_add_session_performance_indexes.py +70 -0
  133. solace_agent_mesh/gateway/http_sse/component.py +98 -2
  134. solace_agent_mesh/gateway/http_sse/dependencies.py +4 -4
  135. solace_agent_mesh/gateway/http_sse/main.py +2 -1
  136. solace_agent_mesh/gateway/http_sse/repository/chat_task_repository.py +12 -13
  137. solace_agent_mesh/gateway/http_sse/repository/feedback_repository.py +15 -18
  138. solace_agent_mesh/gateway/http_sse/repository/interfaces.py +25 -18
  139. solace_agent_mesh/gateway/http_sse/repository/session_repository.py +30 -26
  140. solace_agent_mesh/gateway/http_sse/repository/task_repository.py +35 -44
  141. solace_agent_mesh/gateway/http_sse/routers/agent_cards.py +4 -3
  142. solace_agent_mesh/gateway/http_sse/routers/artifacts.py +95 -203
  143. solace_agent_mesh/gateway/http_sse/routers/dto/responses/session_responses.py +4 -3
  144. solace_agent_mesh/gateway/http_sse/routers/sessions.py +2 -2
  145. solace_agent_mesh/gateway/http_sse/routers/tasks.py +33 -41
  146. solace_agent_mesh/gateway/http_sse/routers/visualization.py +17 -11
  147. solace_agent_mesh/gateway/http_sse/services/data_retention_service.py +4 -4
  148. solace_agent_mesh/gateway/http_sse/services/feedback_service.py +51 -43
  149. solace_agent_mesh/gateway/http_sse/services/session_service.py +20 -20
  150. solace_agent_mesh/gateway/http_sse/services/task_logger_service.py +8 -8
  151. solace_agent_mesh/gateway/http_sse/shared/base_repository.py +45 -71
  152. solace_agent_mesh/gateway/http_sse/shared/types.py +0 -18
  153. solace_agent_mesh/templates/gateway_config_template.yaml +0 -5
  154. solace_agent_mesh/templates/logging_config_template.ini +10 -6
  155. solace_agent_mesh/templates/plugin_gateway_config_template.yaml +0 -3
  156. solace_agent_mesh/templates/shared_config.yaml +40 -0
  157. {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/METADATA +47 -21
  158. {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/RECORD +162 -141
  159. solace_agent_mesh/assets/docs/assets/js/5c2bd65f.e49689dd.js +0 -1
  160. solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.39d5851d.js +0 -1
  161. solace_agent_mesh/assets/docs/assets/js/71da7b71.804d6567.js +0 -1
  162. solace_agent_mesh/assets/docs/assets/js/77cf947d.64c9bd6c.js +0 -1
  163. solace_agent_mesh/assets/docs/assets/js/9e9d0a82.dd810042.js +0 -1
  164. solace_agent_mesh/assets/docs/assets/js/db924877.cbc66f02.js +0 -1
  165. solace_agent_mesh/assets/docs/assets/js/de915948.139b4b9c.js +0 -1
  166. solace_agent_mesh/assets/docs/assets/js/e6f9706b.582a78ca.js +0 -1
  167. solace_agent_mesh/assets/docs/assets/js/f284c35a.5766a13d.js +0 -1
  168. solace_agent_mesh/assets/docs/assets/js/ff4d71f2.9c0297a6.js +0 -1
  169. solace_agent_mesh/assets/docs/assets/js/runtime~main.18dc45dd.js +0 -1
  170. solace_agent_mesh/assets/docs/lunr-index-1760121512891.json +0 -1
  171. solace_agent_mesh/assets/docs/search-doc-1760121512891.json +0 -1
  172. solace_agent_mesh/client/webui/frontend/static/assets/main-2nd1gbaH.js +0 -339
  173. solace_agent_mesh/client/webui/frontend/static/assets/main-DoKXctCM.css +0 -1
  174. solace_agent_mesh/config_portal/frontend/static/client/assets/_index-BNuqpWDc.js +0 -98
  175. solace_agent_mesh/evaluation/config_loader.py +0 -657
  176. solace_agent_mesh/evaluation/test_case_loader.py +0 -714
  177. /solace_agent_mesh/assets/docs/assets/js/{main.bd3c34f3.js.LICENSE.txt → main.20feee82.js.LICENSE.txt} +0 -0
  178. {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/WHEEL +0 -0
  179. {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/entry_points.txt +0 -0
  180. {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -4,14 +4,18 @@ This module processes test run messages and generates comprehensive summaries.
4
4
  """
5
5
 
6
6
  import json
7
- import os
7
+ import logging
8
8
  import re
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+ import requests
9
14
  import yaml
10
15
 
11
- from datetime import datetime
12
- from typing import Dict, List, Optional, Any, Tuple, Set
13
- from dataclasses import dataclass, field
14
- from .test_case_loader import load_test_case
16
+ from .shared import TestSuiteConfiguration, load_test_case
17
+
18
+ log = logging.getLogger(__name__)
15
19
 
16
20
 
17
21
  @dataclass
@@ -21,7 +25,7 @@ class ToolCall:
21
25
  call_id: str
22
26
  agent: str
23
27
  tool_name: str
24
- arguments: Dict[str, Any]
28
+ arguments: dict[str, any]
25
29
  timestamp: str
26
30
 
27
31
 
@@ -31,21 +35,21 @@ class ArtifactInfo:
31
35
 
32
36
  artifact_name: str
33
37
  directory: str
34
- versions: List[Dict[str, Any]]
35
- artifact_type: Optional[str] = None
36
- source_path: Optional[str] = None
37
- created_by_tool: Optional[str] = None
38
- created_by_call_id: Optional[str] = None
39
- creation_timestamp: Optional[str] = None
38
+ versions: list[dict[str, any]]
39
+ artifact_type: str | None = None
40
+ source_path: str | None = None
41
+ created_by_tool: str | None = None
42
+ created_by_call_id: str | None = None
43
+ creation_timestamp: str | None = None
40
44
 
41
45
 
42
46
  @dataclass
43
47
  class TimeMetrics:
44
48
  """Time-related metrics for a test run."""
45
49
 
46
- start_time: Optional[str] = None
47
- end_time: Optional[str] = None
48
- duration_seconds: Optional[float] = None
50
+ start_time: str | None = None
51
+ end_time: str | None = None
52
+ duration_seconds: float | None = None
49
53
 
50
54
 
51
55
  @dataclass
@@ -61,12 +65,12 @@ class RunSummary:
61
65
  final_status: str = ""
62
66
  final_message: str = ""
63
67
  time_metrics: TimeMetrics = field(default_factory=TimeMetrics)
64
- tool_calls: List[ToolCall] = field(default_factory=list)
65
- input_artifacts: List[ArtifactInfo] = field(default_factory=list)
66
- output_artifacts: List[ArtifactInfo] = field(default_factory=list)
67
- errors: List[str] = field(default_factory=list)
68
+ tool_calls: list[ToolCall] = field(default_factory=list)
69
+ input_artifacts: list[ArtifactInfo] = field(default_factory=list)
70
+ output_artifacts: list[ArtifactInfo] = field(default_factory=list)
71
+ errors: list[str] = field(default_factory=list)
68
72
 
69
- def to_dict(self) -> Dict[str, Any]:
73
+ def to_dict(self) -> dict[str, any]:
70
74
  """Convert summary to dictionary format for JSON serialization."""
71
75
  return {
72
76
  "test_case_id": self.test_case_id,
@@ -115,16 +119,16 @@ class RunSummary:
115
119
  class ConfigService:
116
120
  """Handles configuration loading and YAML processing."""
117
121
 
118
- _config_cache: Dict[str, Any] = {}
122
+ _config_cache: dict[str, any] = {}
119
123
 
120
124
  @classmethod
121
- def load_yaml_with_includes(cls, file_path: str) -> Dict[str, Any]:
125
+ def load_yaml_with_includes(cls, file_path: str) -> dict[str, any]:
122
126
  """Load YAML file with !include directive processing and caching."""
123
127
  if file_path in cls._config_cache:
124
128
  return cls._config_cache[file_path]
125
129
 
126
130
  try:
127
- with open(file_path, "r") as f:
131
+ with open(file_path) as f:
128
132
  content = f.read()
129
133
 
130
134
  content = cls._process_includes(content, file_path)
@@ -133,17 +137,18 @@ class ConfigService:
133
137
  return config
134
138
 
135
139
  except (FileNotFoundError, yaml.YAMLError) as e:
136
- raise ValueError(f"Failed to load YAML config from {file_path}: {e}")
140
+ raise ValueError(f"Failed to load YAML config from {file_path}: {e}") from e
137
141
 
138
142
  @staticmethod
139
143
  def _process_includes(content: str, base_file_path: str) -> str:
140
144
  """Process !include directives in YAML content."""
141
145
  include_pattern = re.compile(r"^\s*!include\s+(.*)$", re.MULTILINE)
146
+ base_dir = Path(base_file_path).parent
142
147
 
143
148
  def replacer(match):
144
- include_path = match.group(1).strip()
145
- include_path = os.path.join(os.path.dirname(base_file_path), include_path)
146
- with open(include_path, "r") as inc_f:
149
+ include_path_str = match.group(1).strip()
150
+ include_path = base_dir / include_path_str
151
+ with include_path.open() as inc_f:
147
152
  return inc_f.read()
148
153
 
149
154
  # Repeatedly replace includes until none are left
@@ -153,7 +158,7 @@ class ConfigService:
153
158
  return content
154
159
 
155
160
  @classmethod
156
- def get_artifact_config(cls) -> Tuple[str, str]:
161
+ def get_local_artifact_config(cls) -> tuple[str, str]:
157
162
  """Get artifact service configuration from eval backend config."""
158
163
  try:
159
164
  webui_config = cls.load_yaml_with_includes("configs/eval_backend.yaml")
@@ -171,36 +176,37 @@ class ConfigService:
171
176
  raise ValueError("Could not find 'a2a_eval_backend_app' config")
172
177
 
173
178
  except Exception as e:
174
- raise ValueError(f"Failed to load artifact configuration: {e}")
179
+ raise ValueError(f"Failed to load artifact configuration: {e}") from e
175
180
 
176
181
 
177
182
  class FileService:
178
183
  """Handles file operations and path management."""
179
184
 
180
185
  @staticmethod
181
- def load_json(filepath: str) -> Any:
186
+ def load_json(filepath: Path) -> any:
182
187
  """Load JSON data from file."""
183
188
  try:
184
- with open(filepath, "r") as f:
189
+ with filepath.open() as f:
185
190
  return json.load(f)
186
191
  except (FileNotFoundError, json.JSONDecodeError) as e:
187
- raise ValueError(f"Failed to load JSON from {filepath}: {e}")
192
+ raise ValueError(f"Failed to load JSON from {filepath}: {e}") from e
188
193
 
189
194
  @staticmethod
190
- def save_json(data: Any, filepath: str):
195
+ def save_json(data: any, filepath: Path):
191
196
  """Save data as JSON to file."""
192
197
  try:
193
- with open(filepath, "w") as f:
198
+ filepath.parent.mkdir(parents=True, exist_ok=True)
199
+ with filepath.open("w") as f:
194
200
  json.dump(data, f, indent=2)
195
201
  except Exception as e:
196
- raise ValueError(f"Failed to save JSON to {filepath}: {e}")
202
+ raise ValueError(f"Failed to save JSON to {filepath}: {e}") from e
197
203
 
198
204
 
199
205
  class TestCaseService:
200
206
  """Handles test case loading and validation."""
201
207
 
202
208
  @staticmethod
203
- def load_test_case(test_case_id: str) -> Optional[Dict[str, Any]]:
209
+ def load_test_case(test_case_id: str) -> dict[str, any] | None:
204
210
  """Load test case definition with error handling."""
205
211
  try:
206
212
  return load_test_case(test_case_id)
@@ -208,7 +214,7 @@ class TestCaseService:
208
214
  return None
209
215
 
210
216
  @staticmethod
211
- def extract_input_artifact_names(test_case: Dict[str, Any]) -> Set[str]:
217
+ def extract_input_artifact_names(test_case: dict[str, any]) -> set[str]:
212
218
  """Extract input artifact names from test case definition."""
213
219
  input_artifact_names = set()
214
220
  test_case_artifacts = test_case.get("artifacts", [])
@@ -216,7 +222,7 @@ class TestCaseService:
216
222
  for tc_artifact in test_case_artifacts:
217
223
  if tc_artifact.get("type") == "file" and "path" in tc_artifact:
218
224
  # Extract filename from path (e.g., "artifacts/sample.csv" -> "sample.csv")
219
- artifact_name = os.path.basename(tc_artifact["path"])
225
+ artifact_name = Path(tc_artifact["path"]).name
220
226
  input_artifact_names.add(artifact_name)
221
227
 
222
228
  return input_artifact_names
@@ -226,7 +232,7 @@ class TimeProcessor:
226
232
  """Handles timestamp parsing and duration calculations."""
227
233
 
228
234
  @staticmethod
229
- def extract_start_time(first_message: Dict[str, Any]) -> Optional[str]:
235
+ def extract_start_time(first_message: dict[str, any]) -> str | None:
230
236
  """Extract start time from the first message."""
231
237
  try:
232
238
  payload = first_message.get("payload", {})
@@ -250,7 +256,7 @@ class TimeProcessor:
250
256
  return None
251
257
 
252
258
  @staticmethod
253
- def extract_end_time(last_message: Dict[str, Any]) -> Optional[str]:
259
+ def extract_end_time(last_message: dict[str, any]) -> str | None:
254
260
  """Extract end time from the last message."""
255
261
  try:
256
262
  payload = last_message.get("payload", {})
@@ -263,7 +269,7 @@ class TimeProcessor:
263
269
  @staticmethod
264
270
  def calculate_duration(
265
271
  start_time_str: str, end_time_str: str
266
- ) -> Tuple[Optional[float], Optional[str]]:
272
+ ) -> tuple[float | None, str | None]:
267
273
  """Calculate duration and return normalized start time."""
268
274
  try:
269
275
  start_time = datetime.fromisoformat(start_time_str)
@@ -293,8 +299,8 @@ class MessageProcessor:
293
299
 
294
300
  @staticmethod
295
301
  def extract_namespace_and_agent(
296
- first_message: Dict[str, Any],
297
- ) -> Tuple[Optional[str], Optional[str]]:
302
+ first_message: dict[str, any],
303
+ ) -> tuple[str | None, str | None]:
298
304
  """Extract namespace and target agent from the first message topic."""
299
305
  try:
300
306
  topic = first_message.get("topic", "")
@@ -308,7 +314,7 @@ class MessageProcessor:
308
314
  return None, None
309
315
 
310
316
  @staticmethod
311
- def extract_context_id(first_message: Dict[str, Any]) -> Optional[str]:
317
+ def extract_context_id(first_message: dict[str, any]) -> str | None:
312
318
  """Extract context ID from the first message."""
313
319
  try:
314
320
  payload = first_message.get("payload", {})
@@ -320,8 +326,8 @@ class MessageProcessor:
320
326
 
321
327
  @staticmethod
322
328
  def extract_final_status_info(
323
- last_message: Dict[str, Any],
324
- ) -> Tuple[Optional[str], Optional[str]]:
329
+ last_message: dict[str, any],
330
+ ) -> tuple[str | None, str | None]:
325
331
  """Extract final status and message from the last message."""
326
332
  try:
327
333
  payload = last_message.get("payload", {})
@@ -344,7 +350,7 @@ class MessageProcessor:
344
350
  return None, None
345
351
 
346
352
  @staticmethod
347
- def extract_tool_calls(messages: List[Dict[str, Any]]) -> List[ToolCall]:
353
+ def extract_tool_calls(messages: list[dict[str, any]]) -> list[ToolCall]:
348
354
  """Extract all tool calls from messages."""
349
355
  tool_calls = []
350
356
  processed_tool_calls = set()
@@ -381,60 +387,156 @@ class MessageProcessor:
381
387
  class ArtifactService:
382
388
  """Manages artifact discovery, categorization, and metadata."""
383
389
 
384
- def __init__(self, base_path: str, user_identity: str):
385
- self.base_path = base_path
386
- self.user_identity = user_identity
390
+ def __init__(self, config: TestSuiteConfiguration):
391
+ self.config = config
392
+ self.is_remote = config.remote is not None
393
+ if self.is_remote:
394
+ self.base_url = config.remote.environment.get("EVAL_REMOTE_URL")
395
+ self.auth_token = config.remote.environment.get("EVAL_AUTH_TOKEN")
396
+ else:
397
+ self.base_path, self.user_identity = (
398
+ ConfigService.get_local_artifact_config()
399
+ )
400
+
401
+ def get_artifacts(
402
+ self, namespace: str, context_id: str
403
+ ) -> list[ArtifactInfo]:
404
+ """Retrieve artifact information, either locally or from a remote API."""
405
+ if self.is_remote:
406
+ return self._get_remote_artifacts(context_id)
407
+ else:
408
+ return self._get_local_artifacts(namespace, context_id)
409
+
410
+ def _get_remote_artifacts(self, context_id: str) -> list[ArtifactInfo]:
411
+ """Fetch artifacts from the remote API."""
412
+ if not self.base_url:
413
+ return []
414
+
415
+ url = f"{self.base_url}/api/v2/artifacts"
416
+ params = {"session_id": context_id}
417
+
418
+ headers = {"Content-Type": "application/json"}
419
+ if self.auth_token:
420
+ headers["Authorization"] = f"Bearer {self.auth_token}"
421
+ log.info("Auth token found and added to headers.")
422
+ else:
423
+ log.warning("No auth token found for remote artifact request.")
424
+
425
+ log.info(f"Fetching remote artifacts from URL: {url} with params: {params}")
426
+
427
+ try:
428
+ with requests.Session() as session:
429
+ session.headers.update(headers)
430
+ response = session.get(url, params=params, allow_redirects=False)
431
+
432
+ log.info(f"Initial response status: {response.status_code}")
433
+
434
+ # Handle 307 Temporary Redirect manually
435
+ if response.status_code == 307:
436
+ redirect_url = response.headers.get("Location")
437
+ if not redirect_url:
438
+ log.error(
439
+ f"Server sent 307 redirect without a Location header. Full headers: {response.headers}"
440
+ )
441
+ response.raise_for_status() # Re-raise the error to halt execution
442
+
443
+ log.info(f"Handling 307 redirect to: {redirect_url}")
444
+ with requests.Session() as redirect_session:
445
+ redirect_session.headers.update(headers)
446
+ # The redirected URL from the server should be complete, so no params needed.
447
+ response = redirect_session.get(redirect_url)
448
+
449
+ response.raise_for_status()
450
+
451
+ # Handle empty response body after potential redirect
452
+ if not response.text:
453
+ log.info("Received empty response from artifact API, assuming no artifacts.")
454
+ return []
455
+
456
+ artifacts_data = response.json()
457
+
458
+ artifact_infos = []
459
+ for data in artifacts_data:
460
+ # The API returns a flat list of latest versions, so we reconstruct
461
+ # the version list to match the structure ArtifactInfo expects.
462
+ version_info = {
463
+ "version": data.get("version", 0),
464
+ "metadata": {
465
+ "mime_type": data.get("mime_type"),
466
+ "size": data.get("size"),
467
+ "last_modified": data.get("last_modified"),
468
+ "description": data.get("description"),
469
+ "schema": data.get("schema"),
470
+ },
471
+ }
472
+ info = ArtifactInfo(
473
+ artifact_name=data.get("filename"),
474
+ directory="", # Not applicable for remote
475
+ versions=[version_info],
476
+ )
477
+ artifact_infos.append(info)
478
+ return artifact_infos
387
479
 
388
- def get_artifact_info(self, namespace: str, context_id: str) -> List[ArtifactInfo]:
389
- """Retrieve information about artifacts from the session directory."""
480
+ except requests.RequestException as e:
481
+ log.error(f"Failed to fetch remote artifacts: {e}")
482
+ return []
483
+ except json.JSONDecodeError:
484
+ log.error("Failed to decode JSON response from artifact API")
485
+ return []
486
+
487
+ def _get_local_artifacts(
488
+ self, namespace: str, context_id: str
489
+ ) -> list[ArtifactInfo]:
490
+ """Retrieve information about artifacts from the local session directory."""
390
491
  artifact_info = []
391
- session_dir = os.path.join(
392
- self.base_path, namespace, self.user_identity, context_id
492
+ session_dir = (
493
+ Path(self.base_path) / namespace / self.user_identity / context_id
393
494
  )
394
495
 
395
- if not os.path.isdir(session_dir):
496
+ if not session_dir.is_dir():
396
497
  return artifact_info
397
498
 
398
- for item in os.listdir(session_dir):
399
- item_path = os.path.join(session_dir, item)
400
- if os.path.isdir(item_path) and not item.endswith(".metadata.json"):
499
+ for item_path in session_dir.iterdir():
500
+ if item_path.is_dir() and not item_path.name.endswith(".metadata.json"):
401
501
  artifact_info.append(
402
- self._process_artifact_directory(session_dir, item, item_path)
502
+ self._process_artifact_directory(
503
+ session_dir, item_path.name, item_path
504
+ )
403
505
  )
404
506
 
405
507
  return artifact_info
406
508
 
407
509
  def _process_artifact_directory(
408
- self, session_dir: str, artifact_name: str, item_path: str
510
+ self, session_dir: Path, artifact_name: str, item_path: Path
409
511
  ) -> ArtifactInfo:
410
512
  """Process a single artifact directory and extract metadata."""
411
- metadata_dir = os.path.join(session_dir, f"{artifact_name}.metadata.json")
513
+ metadata_dir = session_dir / f"{artifact_name}.metadata.json"
412
514
  versions = []
413
515
 
414
- if os.path.isdir(metadata_dir):
415
- for version_file in os.listdir(item_path):
416
- if not version_file.endswith(".meta"):
417
- version_metadata_path = os.path.join(metadata_dir, version_file)
418
- if os.path.exists(version_metadata_path):
516
+ if metadata_dir.is_dir():
517
+ for version_path in item_path.iterdir():
518
+ if not version_path.name.endswith(".meta"):
519
+ version_metadata_path = metadata_dir / version_path.name
520
+ if version_metadata_path.exists():
419
521
  try:
420
- with open(version_metadata_path, "r") as f:
522
+ with version_metadata_path.open() as f:
421
523
  metadata = json.load(f)
422
524
  versions.append(
423
- {"version": version_file, "metadata": metadata}
525
+ {"version": version_path.name, "metadata": metadata}
424
526
  )
425
527
  except (json.JSONDecodeError, FileNotFoundError):
426
528
  continue
427
529
 
428
530
  return ArtifactInfo(
429
- artifact_name=artifact_name, directory=item_path, versions=versions
531
+ artifact_name=artifact_name, directory=str(item_path), versions=versions
430
532
  )
431
533
 
432
534
  def categorize_artifacts(
433
535
  self,
434
- artifacts: List[ArtifactInfo],
435
- test_case: Dict[str, Any],
436
- tool_calls: List[ToolCall],
437
- ) -> Tuple[List[ArtifactInfo], List[ArtifactInfo]]:
536
+ artifacts: list[ArtifactInfo],
537
+ test_case: dict[str, any],
538
+ tool_calls: list[ToolCall],
539
+ ) -> tuple[list[ArtifactInfo], list[ArtifactInfo]]:
438
540
  """Categorize artifacts into input and output based on test case and tool calls."""
439
541
  input_artifacts = []
440
542
  output_artifacts = []
@@ -463,8 +565,8 @@ class ArtifactService:
463
565
  return input_artifacts, output_artifacts
464
566
 
465
567
  def _create_tool_output_mapping(
466
- self, tool_calls: List[ToolCall]
467
- ) -> Dict[str, ToolCall]:
568
+ self, tool_calls: list[ToolCall]
569
+ ) -> dict[str, ToolCall]:
468
570
  """Create mapping of output filenames to the tools that created them."""
469
571
  tool_output_mapping = {}
470
572
 
@@ -484,7 +586,7 @@ class ArtifactService:
484
586
  return tool_output_mapping
485
587
 
486
588
  def _enhance_input_artifact(
487
- self, artifact: ArtifactInfo, test_case: Dict[str, Any]
589
+ self, artifact: ArtifactInfo, test_case: dict[str, any]
488
590
  ) -> ArtifactInfo:
489
591
  """Enhance input artifact with test case information."""
490
592
  enhanced_artifact = ArtifactInfo(
@@ -500,7 +602,7 @@ class ArtifactService:
500
602
  for tc_artifact in test_case_artifacts:
501
603
  if (
502
604
  tc_artifact.get("type") == "file"
503
- and os.path.basename(tc_artifact["path"]) == artifact.artifact_name
605
+ and Path(tc_artifact["path"]).name == artifact.artifact_name
504
606
  ):
505
607
  enhanced_artifact.artifact_type = tc_artifact["type"]
506
608
  enhanced_artifact.source_path = tc_artifact["path"]
@@ -509,7 +611,7 @@ class ArtifactService:
509
611
  return enhanced_artifact
510
612
 
511
613
  def _enhance_output_artifact(
512
- self, artifact: ArtifactInfo, tool_output_mapping: Dict[str, ToolCall]
614
+ self, artifact: ArtifactInfo, tool_output_mapping: dict[str, ToolCall]
513
615
  ) -> ArtifactInfo:
514
616
  """Enhance output artifact with tool creation information."""
515
617
  enhanced_artifact = ArtifactInfo(
@@ -531,14 +633,15 @@ class ArtifactService:
531
633
  class SummaryBuilder:
532
634
  """Main orchestrator for summary creation."""
533
635
 
534
- def __init__(self):
636
+ def __init__(self, config: TestSuiteConfiguration):
637
+ self.config = config
535
638
  self.file_service = FileService()
536
639
  self.test_case_service = TestCaseService()
537
640
  self.time_processor = TimeProcessor()
538
641
  self.message_processor = MessageProcessor()
539
- self.artifact_service: Optional[ArtifactService] = None
642
+ self.artifact_service = ArtifactService(self.config)
540
643
 
541
- def summarize_run(self, messages_file_path: str) -> Dict[str, Any]:
644
+ def summarize_run(self, messages_file_path: str) -> dict[str, any]:
542
645
  """
543
646
  Create a comprehensive summary of a test run from messages.json file.
544
647
 
@@ -554,8 +657,8 @@ class SummaryBuilder:
554
657
  if not messages:
555
658
  return {}
556
659
 
557
- run_path = os.path.dirname(messages_file_path)
558
- test_case_info_path = os.path.join(run_path, "test_case_info.json")
660
+ run_path = Path(messages_file_path).parent
661
+ test_case_info_path = run_path / "test_case_info.json"
559
662
  test_case_info = self.file_service.load_json(test_case_info_path)
560
663
  test_case_path = test_case_info["path"]
561
664
 
@@ -575,19 +678,19 @@ class SummaryBuilder:
575
678
 
576
679
  except Exception as e:
577
680
  # Return minimal summary with error information
578
- run_path = os.path.dirname(messages_file_path)
681
+ run_path = Path(messages_file_path).parent
579
682
  return {
580
- "test_case_id": os.path.basename(os.path.dirname(run_path)),
581
- "run_id": os.path.basename(run_path),
683
+ "test_case_id": run_path.parent.name,
684
+ "run_id": run_path.name,
582
685
  "errors": [f"Failed to process summary: {str(e)}"],
583
686
  }
584
687
 
585
688
  def _load_and_validate_messages(
586
689
  self, messages_file_path: str
587
- ) -> List[Dict[str, Any]]:
690
+ ) -> list[dict[str, any]]:
588
691
  """Load and validate messages from file."""
589
692
  try:
590
- messages = self.file_service.load_json(messages_file_path)
693
+ messages = self.file_service.load_json(Path(messages_file_path))
591
694
  return messages if isinstance(messages, list) else []
592
695
  except Exception:
593
696
  return []
@@ -596,17 +699,15 @@ class SummaryBuilder:
596
699
  self, messages_file_path: str, test_case_path: str
597
700
  ) -> RunSummary:
598
701
  """Initialize summary with basic path-derived information."""
599
- run_path = os.path.dirname(messages_file_path)
600
- run_id = os.path.basename(run_path)
601
- test_case_id = os.path.splitext(os.path.basename(test_case_path))[0].replace(
602
- ".test", ""
603
- )
702
+ run_path = Path(messages_file_path).parent
703
+ run_id = run_path.name
704
+ test_case_id = Path(test_case_path).stem.replace(".test", "")
604
705
 
605
706
  return RunSummary(test_case_id=test_case_id, run_id=run_id)
606
707
 
607
708
  def _load_test_case(
608
709
  self, summary: RunSummary, test_case_path: str
609
- ) -> Dict[str, Any]:
710
+ ) -> dict[str, any]:
610
711
  """Load test case and update summary with test case info."""
611
712
  test_case = self.test_case_service.load_test_case(test_case_path)
612
713
 
@@ -621,9 +722,9 @@ class SummaryBuilder:
621
722
 
622
723
  def _process_messages(
623
724
  self,
624
- messages: List[Dict[str, Any]],
725
+ messages: list[dict[str, any]],
625
726
  summary: RunSummary,
626
- test_case: Dict[str, Any],
727
+ test_case: dict[str, any],
627
728
  ):
628
729
  """Process all messages to extract relevant information."""
629
730
  if not messages:
@@ -666,8 +767,8 @@ class SummaryBuilder:
666
767
 
667
768
  def _process_time_metrics(
668
769
  self,
669
- first_message: Dict[str, Any],
670
- last_message: Dict[str, Any],
770
+ first_message: dict[str, any],
771
+ last_message: dict[str, any],
671
772
  summary: RunSummary,
672
773
  ):
673
774
  """Process and calculate time metrics."""
@@ -690,19 +791,14 @@ class SummaryBuilder:
690
791
  "Could not parse start or end time to calculate duration."
691
792
  )
692
793
 
693
- def _add_artifact_information(self, summary: RunSummary, test_case: Dict[str, Any]):
794
+ def _add_artifact_information(self, summary: RunSummary, test_case: dict[str, any]):
694
795
  """Add artifact information if configuration is available."""
695
- if not summary.namespace or not summary.context_id:
796
+ if not summary.context_id:
696
797
  return
697
798
 
698
799
  try:
699
- # Initialize artifact service if not already done
700
- if not self.artifact_service:
701
- base_path, user_identity = ConfigService.get_artifact_config()
702
- self.artifact_service = ArtifactService(base_path, user_identity)
703
-
704
800
  # Get and categorize artifacts
705
- all_artifacts = self.artifact_service.get_artifact_info(
801
+ all_artifacts = self.artifact_service.get_artifacts(
706
802
  summary.namespace, summary.context_id
707
803
  )
708
804
 
@@ -719,7 +815,9 @@ class SummaryBuilder:
719
815
  summary.errors.append(f"Could not add artifact info: {str(e)}")
720
816
 
721
817
 
722
- def summarize_run(messages_file_path: str) -> Dict[str, Any]:
818
+ def summarize_run(
819
+ messages_file_path: str, config: TestSuiteConfiguration
820
+ ) -> dict[str, any]:
723
821
  """
724
822
  Main entry point for summarizing a test run.
725
823
 
@@ -728,11 +826,12 @@ def summarize_run(messages_file_path: str) -> Dict[str, Any]:
728
826
 
729
827
  Args:
730
828
  messages_file_path: Path to the messages.json file
829
+ config: The test suite configuration.
731
830
 
732
831
  Returns:
733
832
  Dictionary containing the summarized metrics
734
833
  """
735
- builder = SummaryBuilder()
834
+ builder = SummaryBuilder(config)
736
835
  return builder.summarize_run(messages_file_path)
737
836
 
738
837
 
@@ -740,29 +839,40 @@ def main():
740
839
  """Main entry point for command-line usage."""
741
840
  import sys
742
841
 
842
+ from .shared import EvaluationConfigLoader
843
+
743
844
  if len(sys.argv) != 2:
744
- print("Usage: python summarize_refactored.py <messages_file_path>")
845
+ log.info("Usage: python summarize_refactored.py <messages_file_path>")
745
846
  sys.exit(1)
746
847
 
747
- messages_file_path = sys.argv[1]
848
+ messages_file_path = Path(sys.argv[1])
748
849
 
749
- if not os.path.exists(messages_file_path):
750
- print(f"Error: Messages file not found at: {messages_file_path}")
850
+ if not messages_file_path.exists():
851
+ log.info(f"Error: Messages file not found at: {messages_file_path}")
751
852
  sys.exit(1)
752
853
 
753
854
  try:
855
+ # This main function is for standalone testing. It needs a config.
856
+ # We'll assume a default config for this purpose.
857
+ config_path = Path.cwd() / "tests" / "evaluation" / "config.json"
858
+ if not config_path.exists():
859
+ log.error(f"Default test config not found at {config_path}")
860
+ return
861
+ config_loader = EvaluationConfigLoader(str(config_path))
862
+ config = config_loader.load_configuration()
863
+
754
864
  # Generate summary
755
- summary_data = summarize_run(messages_file_path)
865
+ summary_data = summarize_run(str(messages_file_path), config)
756
866
 
757
867
  # Save summary file
758
- output_dir = os.path.dirname(messages_file_path)
759
- summary_file_path = os.path.join(output_dir, "summary.json")
868
+ output_dir = messages_file_path.parent
869
+ summary_file_path = output_dir / "summary.json"
760
870
 
761
871
  FileService.save_json(summary_data, summary_file_path)
762
- print(f"Summary file created at: {summary_file_path}")
872
+ log.info(f"Summary file created at: {summary_file_path}")
763
873
 
764
874
  except Exception as e:
765
- print(f"Error generating summary: {e}")
875
+ log.error(f"Error generating summary: {e}")
766
876
  sys.exit(1)
767
877
 
768
878