solace-agent-mesh 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of solace-agent-mesh might be problematic. Click here for more details.
- solace_agent_mesh/agent/adk/callbacks.py +0 -5
- solace_agent_mesh/agent/adk/models/lite_llm.py +123 -8
- solace_agent_mesh/agent/adk/models/oauth2_token_manager.py +245 -0
- solace_agent_mesh/agent/protocol/event_handlers.py +40 -1
- solace_agent_mesh/agent/proxies/__init__.py +0 -0
- solace_agent_mesh/agent/proxies/a2a/__init__.py +3 -0
- solace_agent_mesh/agent/proxies/a2a/app.py +55 -0
- solace_agent_mesh/agent/proxies/a2a/component.py +1115 -0
- solace_agent_mesh/agent/proxies/a2a/config.py +140 -0
- solace_agent_mesh/agent/proxies/a2a/oauth_token_cache.py +104 -0
- solace_agent_mesh/agent/proxies/base/__init__.py +3 -0
- solace_agent_mesh/agent/proxies/base/app.py +99 -0
- solace_agent_mesh/agent/proxies/base/component.py +619 -0
- solace_agent_mesh/agent/proxies/base/config.py +85 -0
- solace_agent_mesh/agent/proxies/base/proxy_task_context.py +17 -0
- solace_agent_mesh/agent/sac/app.py +9 -3
- solace_agent_mesh/agent/sac/component.py +160 -8
- solace_agent_mesh/agent/tools/audio_tools.py +125 -8
- solace_agent_mesh/agent/tools/web_tools.py +10 -5
- solace_agent_mesh/agent/utils/artifact_helpers.py +141 -3
- solace_agent_mesh/assets/docs/404.html +3 -3
- solace_agent_mesh/assets/docs/assets/js/5c2bd65f.eda4bcb2.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.f4b15f3b.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/71da7b71.38583438.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/77cf947d.48cb18a2.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/924ffdeb.8095e148.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/9e9d0a82.570c057b.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{ad71b5ed.60668e9e.js → ad71b5ed.af3ecfd1.js} +1 -1
- solace_agent_mesh/assets/docs/assets/js/ceb2a7a6.5d92d7d0.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{da0b5bad.9d369087.js → da0b5bad.d08a9466.js} +1 -1
- solace_agent_mesh/assets/docs/assets/js/db924877.e98d12a1.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/de915948.27d6b065.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/e6f9706b.e74a984d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/f284c35a.42f59cdd.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/ff4d71f2.15b02f97.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{main.bd3c34f3.js → main.20feee82.js} +2 -2
- solace_agent_mesh/assets/docs/assets/js/runtime~main.0d198646.js +1 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/agents/index.html +15 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/artifact-management/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/audio-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/data-analysis-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/embeds/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/cli/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/gateways/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/orchestrator/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/plugins/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/proxies/index.html +262 -0
- solace_agent_mesh/assets/docs/docs/documentation/deploying/debugging/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/deploying/deployment-options/index.html +31 -3
- solace_agent_mesh/assets/docs/docs/documentation/deploying/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/deploying/observability/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/developing/create-agents/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/create-gateways/index.html +5 -5
- solace_agent_mesh/assets/docs/docs/documentation/developing/creating-python-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/creating-service-providers/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/evaluations/index.html +135 -0
- solace_agent_mesh/assets/docs/docs/documentation/developing/index.html +6 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/structure/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/bedrock-agents/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/custom-agent/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/event-mesh-gateway/index.html +5 -5
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mcp-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mongodb-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rag-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rest-gateway/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/slack-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/sql-database/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/installation/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/rbac-setup-guide/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/single-sign-on/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/architecture/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/try-agent-mesh/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/configurations/index.html +6 -5
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/installation/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/large_language_models/index.html +100 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/run-project/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-gateway-upgrade-to-0.3.0/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-technical-migration-map/index.html +3 -3
- solace_agent_mesh/assets/docs/lunr-index-1761165361160.json +1 -0
- solace_agent_mesh/assets/docs/lunr-index.json +1 -1
- solace_agent_mesh/assets/docs/search-doc-1761165361160.json +1 -0
- solace_agent_mesh/assets/docs/search-doc.json +1 -1
- solace_agent_mesh/assets/docs/sitemap.xml +1 -1
- solace_agent_mesh/cli/__init__.py +1 -1
- solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +2 -69
- solace_agent_mesh/cli/commands/eval_cmd.py +11 -49
- solace_agent_mesh/cli/commands/init_cmd/__init__.py +0 -5
- solace_agent_mesh/cli/commands/init_cmd/env_step.py +10 -12
- solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +9 -61
- solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +9 -49
- solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +1 -2
- solace_agent_mesh/client/webui/frontend/static/assets/{authCallback-DwrxZE0E.js → authCallback-BTf6dqwp.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/assets/{client-DarGQzyw.js → client-CaY59VuC.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/assets/main-BGTaW0uv.js +342 -0
- solace_agent_mesh/client/webui/frontend/static/assets/main-DHJKSW1S.css +1 -0
- solace_agent_mesh/client/webui/frontend/static/assets/{vendor-BKIeiHj_.js → vendor-BEmvJSYz.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/auth-callback.html +3 -3
- solace_agent_mesh/client/webui/frontend/static/index.html +4 -4
- solace_agent_mesh/common/a2a/__init__.py +24 -0
- solace_agent_mesh/common/a2a/artifact.py +39 -0
- solace_agent_mesh/common/a2a/events.py +29 -0
- solace_agent_mesh/common/a2a/message.py +68 -0
- solace_agent_mesh/common/a2a/protocol.py +73 -1
- solace_agent_mesh/common/agent_registry.py +83 -3
- solace_agent_mesh/common/constants.py +3 -1
- solace_agent_mesh/common/utils/pydantic_utils.py +12 -0
- solace_agent_mesh/config_portal/backend/common.py +1 -1
- solace_agent_mesh/config_portal/frontend/static/client/assets/_index-ByU1X1HD.js +98 -0
- solace_agent_mesh/config_portal/frontend/static/client/assets/{manifest-44d62be6.js → manifest-61038fc6.js} +1 -1
- solace_agent_mesh/config_portal/frontend/static/client/index.html +1 -1
- solace_agent_mesh/evaluation/evaluator.py +128 -104
- solace_agent_mesh/evaluation/message_organizer.py +116 -110
- solace_agent_mesh/evaluation/report_data_processor.py +84 -86
- solace_agent_mesh/evaluation/report_generator.py +73 -79
- solace_agent_mesh/evaluation/run.py +421 -235
- solace_agent_mesh/evaluation/shared/__init__.py +92 -0
- solace_agent_mesh/evaluation/shared/constants.py +47 -0
- solace_agent_mesh/evaluation/shared/exceptions.py +50 -0
- solace_agent_mesh/evaluation/shared/helpers.py +35 -0
- solace_agent_mesh/evaluation/shared/test_case_loader.py +167 -0
- solace_agent_mesh/evaluation/shared/test_suite_loader.py +280 -0
- solace_agent_mesh/evaluation/subscriber.py +111 -232
- solace_agent_mesh/evaluation/summary_builder.py +227 -117
- solace_agent_mesh/gateway/base/app.py +1 -1
- solace_agent_mesh/gateway/base/component.py +8 -1
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251015_add_session_performance_indexes.py +70 -0
- solace_agent_mesh/gateway/http_sse/component.py +98 -2
- solace_agent_mesh/gateway/http_sse/dependencies.py +4 -4
- solace_agent_mesh/gateway/http_sse/main.py +2 -1
- solace_agent_mesh/gateway/http_sse/repository/chat_task_repository.py +12 -13
- solace_agent_mesh/gateway/http_sse/repository/feedback_repository.py +15 -18
- solace_agent_mesh/gateway/http_sse/repository/interfaces.py +25 -18
- solace_agent_mesh/gateway/http_sse/repository/session_repository.py +30 -26
- solace_agent_mesh/gateway/http_sse/repository/task_repository.py +35 -44
- solace_agent_mesh/gateway/http_sse/routers/agent_cards.py +4 -3
- solace_agent_mesh/gateway/http_sse/routers/artifacts.py +95 -203
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/session_responses.py +4 -3
- solace_agent_mesh/gateway/http_sse/routers/sessions.py +2 -2
- solace_agent_mesh/gateway/http_sse/routers/tasks.py +33 -41
- solace_agent_mesh/gateway/http_sse/routers/visualization.py +17 -11
- solace_agent_mesh/gateway/http_sse/services/data_retention_service.py +4 -4
- solace_agent_mesh/gateway/http_sse/services/feedback_service.py +51 -43
- solace_agent_mesh/gateway/http_sse/services/session_service.py +20 -20
- solace_agent_mesh/gateway/http_sse/services/task_logger_service.py +8 -8
- solace_agent_mesh/gateway/http_sse/shared/base_repository.py +45 -71
- solace_agent_mesh/gateway/http_sse/shared/types.py +0 -18
- solace_agent_mesh/templates/gateway_config_template.yaml +0 -5
- solace_agent_mesh/templates/logging_config_template.ini +10 -6
- solace_agent_mesh/templates/plugin_gateway_config_template.yaml +0 -3
- solace_agent_mesh/templates/shared_config.yaml +40 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/METADATA +47 -21
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/RECORD +162 -141
- solace_agent_mesh/assets/docs/assets/js/5c2bd65f.e49689dd.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.39d5851d.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/71da7b71.804d6567.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/77cf947d.64c9bd6c.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/9e9d0a82.dd810042.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/db924877.cbc66f02.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/de915948.139b4b9c.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/e6f9706b.582a78ca.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/f284c35a.5766a13d.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/ff4d71f2.9c0297a6.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/runtime~main.18dc45dd.js +0 -1
- solace_agent_mesh/assets/docs/lunr-index-1760121512891.json +0 -1
- solace_agent_mesh/assets/docs/search-doc-1760121512891.json +0 -1
- solace_agent_mesh/client/webui/frontend/static/assets/main-2nd1gbaH.js +0 -339
- solace_agent_mesh/client/webui/frontend/static/assets/main-DoKXctCM.css +0 -1
- solace_agent_mesh/config_portal/frontend/static/client/assets/_index-BNuqpWDc.js +0 -98
- solace_agent_mesh/evaluation/config_loader.py +0 -657
- solace_agent_mesh/evaluation/test_case_loader.py +0 -714
- /solace_agent_mesh/assets/docs/assets/js/{main.bd3c34f3.js.LICENSE.txt → main.20feee82.js.LICENSE.txt} +0 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/WHEEL +0 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/entry_points.txt +0 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,14 +4,18 @@ This module processes test run messages and generates comprehensive summaries.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
|
-
import
|
|
7
|
+
import logging
|
|
8
8
|
import re
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import requests
|
|
9
14
|
import yaml
|
|
10
15
|
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
from .test_case_loader import load_test_case
|
|
16
|
+
from .shared import TestSuiteConfiguration, load_test_case
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
@dataclass
|
|
@@ -21,7 +25,7 @@ class ToolCall:
|
|
|
21
25
|
call_id: str
|
|
22
26
|
agent: str
|
|
23
27
|
tool_name: str
|
|
24
|
-
arguments:
|
|
28
|
+
arguments: dict[str, any]
|
|
25
29
|
timestamp: str
|
|
26
30
|
|
|
27
31
|
|
|
@@ -31,21 +35,21 @@ class ArtifactInfo:
|
|
|
31
35
|
|
|
32
36
|
artifact_name: str
|
|
33
37
|
directory: str
|
|
34
|
-
versions:
|
|
35
|
-
artifact_type:
|
|
36
|
-
source_path:
|
|
37
|
-
created_by_tool:
|
|
38
|
-
created_by_call_id:
|
|
39
|
-
creation_timestamp:
|
|
38
|
+
versions: list[dict[str, any]]
|
|
39
|
+
artifact_type: str | None = None
|
|
40
|
+
source_path: str | None = None
|
|
41
|
+
created_by_tool: str | None = None
|
|
42
|
+
created_by_call_id: str | None = None
|
|
43
|
+
creation_timestamp: str | None = None
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
@dataclass
|
|
43
47
|
class TimeMetrics:
|
|
44
48
|
"""Time-related metrics for a test run."""
|
|
45
49
|
|
|
46
|
-
start_time:
|
|
47
|
-
end_time:
|
|
48
|
-
duration_seconds:
|
|
50
|
+
start_time: str | None = None
|
|
51
|
+
end_time: str | None = None
|
|
52
|
+
duration_seconds: float | None = None
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
@dataclass
|
|
@@ -61,12 +65,12 @@ class RunSummary:
|
|
|
61
65
|
final_status: str = ""
|
|
62
66
|
final_message: str = ""
|
|
63
67
|
time_metrics: TimeMetrics = field(default_factory=TimeMetrics)
|
|
64
|
-
tool_calls:
|
|
65
|
-
input_artifacts:
|
|
66
|
-
output_artifacts:
|
|
67
|
-
errors:
|
|
68
|
+
tool_calls: list[ToolCall] = field(default_factory=list)
|
|
69
|
+
input_artifacts: list[ArtifactInfo] = field(default_factory=list)
|
|
70
|
+
output_artifacts: list[ArtifactInfo] = field(default_factory=list)
|
|
71
|
+
errors: list[str] = field(default_factory=list)
|
|
68
72
|
|
|
69
|
-
def to_dict(self) ->
|
|
73
|
+
def to_dict(self) -> dict[str, any]:
|
|
70
74
|
"""Convert summary to dictionary format for JSON serialization."""
|
|
71
75
|
return {
|
|
72
76
|
"test_case_id": self.test_case_id,
|
|
@@ -115,16 +119,16 @@ class RunSummary:
|
|
|
115
119
|
class ConfigService:
|
|
116
120
|
"""Handles configuration loading and YAML processing."""
|
|
117
121
|
|
|
118
|
-
_config_cache:
|
|
122
|
+
_config_cache: dict[str, any] = {}
|
|
119
123
|
|
|
120
124
|
@classmethod
|
|
121
|
-
def load_yaml_with_includes(cls, file_path: str) ->
|
|
125
|
+
def load_yaml_with_includes(cls, file_path: str) -> dict[str, any]:
|
|
122
126
|
"""Load YAML file with !include directive processing and caching."""
|
|
123
127
|
if file_path in cls._config_cache:
|
|
124
128
|
return cls._config_cache[file_path]
|
|
125
129
|
|
|
126
130
|
try:
|
|
127
|
-
with open(file_path
|
|
131
|
+
with open(file_path) as f:
|
|
128
132
|
content = f.read()
|
|
129
133
|
|
|
130
134
|
content = cls._process_includes(content, file_path)
|
|
@@ -133,17 +137,18 @@ class ConfigService:
|
|
|
133
137
|
return config
|
|
134
138
|
|
|
135
139
|
except (FileNotFoundError, yaml.YAMLError) as e:
|
|
136
|
-
raise ValueError(f"Failed to load YAML config from {file_path}: {e}")
|
|
140
|
+
raise ValueError(f"Failed to load YAML config from {file_path}: {e}") from e
|
|
137
141
|
|
|
138
142
|
@staticmethod
|
|
139
143
|
def _process_includes(content: str, base_file_path: str) -> str:
|
|
140
144
|
"""Process !include directives in YAML content."""
|
|
141
145
|
include_pattern = re.compile(r"^\s*!include\s+(.*)$", re.MULTILINE)
|
|
146
|
+
base_dir = Path(base_file_path).parent
|
|
142
147
|
|
|
143
148
|
def replacer(match):
|
|
144
|
-
|
|
145
|
-
include_path =
|
|
146
|
-
with open(
|
|
149
|
+
include_path_str = match.group(1).strip()
|
|
150
|
+
include_path = base_dir / include_path_str
|
|
151
|
+
with include_path.open() as inc_f:
|
|
147
152
|
return inc_f.read()
|
|
148
153
|
|
|
149
154
|
# Repeatedly replace includes until none are left
|
|
@@ -153,7 +158,7 @@ class ConfigService:
|
|
|
153
158
|
return content
|
|
154
159
|
|
|
155
160
|
@classmethod
|
|
156
|
-
def
|
|
161
|
+
def get_local_artifact_config(cls) -> tuple[str, str]:
|
|
157
162
|
"""Get artifact service configuration from eval backend config."""
|
|
158
163
|
try:
|
|
159
164
|
webui_config = cls.load_yaml_with_includes("configs/eval_backend.yaml")
|
|
@@ -171,36 +176,37 @@ class ConfigService:
|
|
|
171
176
|
raise ValueError("Could not find 'a2a_eval_backend_app' config")
|
|
172
177
|
|
|
173
178
|
except Exception as e:
|
|
174
|
-
raise ValueError(f"Failed to load artifact configuration: {e}")
|
|
179
|
+
raise ValueError(f"Failed to load artifact configuration: {e}") from e
|
|
175
180
|
|
|
176
181
|
|
|
177
182
|
class FileService:
|
|
178
183
|
"""Handles file operations and path management."""
|
|
179
184
|
|
|
180
185
|
@staticmethod
|
|
181
|
-
def load_json(filepath:
|
|
186
|
+
def load_json(filepath: Path) -> any:
|
|
182
187
|
"""Load JSON data from file."""
|
|
183
188
|
try:
|
|
184
|
-
with open(
|
|
189
|
+
with filepath.open() as f:
|
|
185
190
|
return json.load(f)
|
|
186
191
|
except (FileNotFoundError, json.JSONDecodeError) as e:
|
|
187
|
-
raise ValueError(f"Failed to load JSON from {filepath}: {e}")
|
|
192
|
+
raise ValueError(f"Failed to load JSON from {filepath}: {e}") from e
|
|
188
193
|
|
|
189
194
|
@staticmethod
|
|
190
|
-
def save_json(data:
|
|
195
|
+
def save_json(data: any, filepath: Path):
|
|
191
196
|
"""Save data as JSON to file."""
|
|
192
197
|
try:
|
|
193
|
-
|
|
198
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
with filepath.open("w") as f:
|
|
194
200
|
json.dump(data, f, indent=2)
|
|
195
201
|
except Exception as e:
|
|
196
|
-
raise ValueError(f"Failed to save JSON to {filepath}: {e}")
|
|
202
|
+
raise ValueError(f"Failed to save JSON to {filepath}: {e}") from e
|
|
197
203
|
|
|
198
204
|
|
|
199
205
|
class TestCaseService:
|
|
200
206
|
"""Handles test case loading and validation."""
|
|
201
207
|
|
|
202
208
|
@staticmethod
|
|
203
|
-
def load_test_case(test_case_id: str) ->
|
|
209
|
+
def load_test_case(test_case_id: str) -> dict[str, any] | None:
|
|
204
210
|
"""Load test case definition with error handling."""
|
|
205
211
|
try:
|
|
206
212
|
return load_test_case(test_case_id)
|
|
@@ -208,7 +214,7 @@ class TestCaseService:
|
|
|
208
214
|
return None
|
|
209
215
|
|
|
210
216
|
@staticmethod
|
|
211
|
-
def extract_input_artifact_names(test_case:
|
|
217
|
+
def extract_input_artifact_names(test_case: dict[str, any]) -> set[str]:
|
|
212
218
|
"""Extract input artifact names from test case definition."""
|
|
213
219
|
input_artifact_names = set()
|
|
214
220
|
test_case_artifacts = test_case.get("artifacts", [])
|
|
@@ -216,7 +222,7 @@ class TestCaseService:
|
|
|
216
222
|
for tc_artifact in test_case_artifacts:
|
|
217
223
|
if tc_artifact.get("type") == "file" and "path" in tc_artifact:
|
|
218
224
|
# Extract filename from path (e.g., "artifacts/sample.csv" -> "sample.csv")
|
|
219
|
-
artifact_name =
|
|
225
|
+
artifact_name = Path(tc_artifact["path"]).name
|
|
220
226
|
input_artifact_names.add(artifact_name)
|
|
221
227
|
|
|
222
228
|
return input_artifact_names
|
|
@@ -226,7 +232,7 @@ class TimeProcessor:
|
|
|
226
232
|
"""Handles timestamp parsing and duration calculations."""
|
|
227
233
|
|
|
228
234
|
@staticmethod
|
|
229
|
-
def extract_start_time(first_message:
|
|
235
|
+
def extract_start_time(first_message: dict[str, any]) -> str | None:
|
|
230
236
|
"""Extract start time from the first message."""
|
|
231
237
|
try:
|
|
232
238
|
payload = first_message.get("payload", {})
|
|
@@ -250,7 +256,7 @@ class TimeProcessor:
|
|
|
250
256
|
return None
|
|
251
257
|
|
|
252
258
|
@staticmethod
|
|
253
|
-
def extract_end_time(last_message:
|
|
259
|
+
def extract_end_time(last_message: dict[str, any]) -> str | None:
|
|
254
260
|
"""Extract end time from the last message."""
|
|
255
261
|
try:
|
|
256
262
|
payload = last_message.get("payload", {})
|
|
@@ -263,7 +269,7 @@ class TimeProcessor:
|
|
|
263
269
|
@staticmethod
|
|
264
270
|
def calculate_duration(
|
|
265
271
|
start_time_str: str, end_time_str: str
|
|
266
|
-
) ->
|
|
272
|
+
) -> tuple[float | None, str | None]:
|
|
267
273
|
"""Calculate duration and return normalized start time."""
|
|
268
274
|
try:
|
|
269
275
|
start_time = datetime.fromisoformat(start_time_str)
|
|
@@ -293,8 +299,8 @@ class MessageProcessor:
|
|
|
293
299
|
|
|
294
300
|
@staticmethod
|
|
295
301
|
def extract_namespace_and_agent(
|
|
296
|
-
first_message:
|
|
297
|
-
) ->
|
|
302
|
+
first_message: dict[str, any],
|
|
303
|
+
) -> tuple[str | None, str | None]:
|
|
298
304
|
"""Extract namespace and target agent from the first message topic."""
|
|
299
305
|
try:
|
|
300
306
|
topic = first_message.get("topic", "")
|
|
@@ -308,7 +314,7 @@ class MessageProcessor:
|
|
|
308
314
|
return None, None
|
|
309
315
|
|
|
310
316
|
@staticmethod
|
|
311
|
-
def extract_context_id(first_message:
|
|
317
|
+
def extract_context_id(first_message: dict[str, any]) -> str | None:
|
|
312
318
|
"""Extract context ID from the first message."""
|
|
313
319
|
try:
|
|
314
320
|
payload = first_message.get("payload", {})
|
|
@@ -320,8 +326,8 @@ class MessageProcessor:
|
|
|
320
326
|
|
|
321
327
|
@staticmethod
|
|
322
328
|
def extract_final_status_info(
|
|
323
|
-
last_message:
|
|
324
|
-
) ->
|
|
329
|
+
last_message: dict[str, any],
|
|
330
|
+
) -> tuple[str | None, str | None]:
|
|
325
331
|
"""Extract final status and message from the last message."""
|
|
326
332
|
try:
|
|
327
333
|
payload = last_message.get("payload", {})
|
|
@@ -344,7 +350,7 @@ class MessageProcessor:
|
|
|
344
350
|
return None, None
|
|
345
351
|
|
|
346
352
|
@staticmethod
|
|
347
|
-
def extract_tool_calls(messages:
|
|
353
|
+
def extract_tool_calls(messages: list[dict[str, any]]) -> list[ToolCall]:
|
|
348
354
|
"""Extract all tool calls from messages."""
|
|
349
355
|
tool_calls = []
|
|
350
356
|
processed_tool_calls = set()
|
|
@@ -381,60 +387,156 @@ class MessageProcessor:
|
|
|
381
387
|
class ArtifactService:
|
|
382
388
|
"""Manages artifact discovery, categorization, and metadata."""
|
|
383
389
|
|
|
384
|
-
def __init__(self,
|
|
385
|
-
self.
|
|
386
|
-
self.
|
|
390
|
+
def __init__(self, config: TestSuiteConfiguration):
|
|
391
|
+
self.config = config
|
|
392
|
+
self.is_remote = config.remote is not None
|
|
393
|
+
if self.is_remote:
|
|
394
|
+
self.base_url = config.remote.environment.get("EVAL_REMOTE_URL")
|
|
395
|
+
self.auth_token = config.remote.environment.get("EVAL_AUTH_TOKEN")
|
|
396
|
+
else:
|
|
397
|
+
self.base_path, self.user_identity = (
|
|
398
|
+
ConfigService.get_local_artifact_config()
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def get_artifacts(
|
|
402
|
+
self, namespace: str, context_id: str
|
|
403
|
+
) -> list[ArtifactInfo]:
|
|
404
|
+
"""Retrieve artifact information, either locally or from a remote API."""
|
|
405
|
+
if self.is_remote:
|
|
406
|
+
return self._get_remote_artifacts(context_id)
|
|
407
|
+
else:
|
|
408
|
+
return self._get_local_artifacts(namespace, context_id)
|
|
409
|
+
|
|
410
|
+
def _get_remote_artifacts(self, context_id: str) -> list[ArtifactInfo]:
|
|
411
|
+
"""Fetch artifacts from the remote API."""
|
|
412
|
+
if not self.base_url:
|
|
413
|
+
return []
|
|
414
|
+
|
|
415
|
+
url = f"{self.base_url}/api/v2/artifacts"
|
|
416
|
+
params = {"session_id": context_id}
|
|
417
|
+
|
|
418
|
+
headers = {"Content-Type": "application/json"}
|
|
419
|
+
if self.auth_token:
|
|
420
|
+
headers["Authorization"] = f"Bearer {self.auth_token}"
|
|
421
|
+
log.info("Auth token found and added to headers.")
|
|
422
|
+
else:
|
|
423
|
+
log.warning("No auth token found for remote artifact request.")
|
|
424
|
+
|
|
425
|
+
log.info(f"Fetching remote artifacts from URL: {url} with params: {params}")
|
|
426
|
+
|
|
427
|
+
try:
|
|
428
|
+
with requests.Session() as session:
|
|
429
|
+
session.headers.update(headers)
|
|
430
|
+
response = session.get(url, params=params, allow_redirects=False)
|
|
431
|
+
|
|
432
|
+
log.info(f"Initial response status: {response.status_code}")
|
|
433
|
+
|
|
434
|
+
# Handle 307 Temporary Redirect manually
|
|
435
|
+
if response.status_code == 307:
|
|
436
|
+
redirect_url = response.headers.get("Location")
|
|
437
|
+
if not redirect_url:
|
|
438
|
+
log.error(
|
|
439
|
+
f"Server sent 307 redirect without a Location header. Full headers: {response.headers}"
|
|
440
|
+
)
|
|
441
|
+
response.raise_for_status() # Re-raise the error to halt execution
|
|
442
|
+
|
|
443
|
+
log.info(f"Handling 307 redirect to: {redirect_url}")
|
|
444
|
+
with requests.Session() as redirect_session:
|
|
445
|
+
redirect_session.headers.update(headers)
|
|
446
|
+
# The redirected URL from the server should be complete, so no params needed.
|
|
447
|
+
response = redirect_session.get(redirect_url)
|
|
448
|
+
|
|
449
|
+
response.raise_for_status()
|
|
450
|
+
|
|
451
|
+
# Handle empty response body after potential redirect
|
|
452
|
+
if not response.text:
|
|
453
|
+
log.info("Received empty response from artifact API, assuming no artifacts.")
|
|
454
|
+
return []
|
|
455
|
+
|
|
456
|
+
artifacts_data = response.json()
|
|
457
|
+
|
|
458
|
+
artifact_infos = []
|
|
459
|
+
for data in artifacts_data:
|
|
460
|
+
# The API returns a flat list of latest versions, so we reconstruct
|
|
461
|
+
# the version list to match the structure ArtifactInfo expects.
|
|
462
|
+
version_info = {
|
|
463
|
+
"version": data.get("version", 0),
|
|
464
|
+
"metadata": {
|
|
465
|
+
"mime_type": data.get("mime_type"),
|
|
466
|
+
"size": data.get("size"),
|
|
467
|
+
"last_modified": data.get("last_modified"),
|
|
468
|
+
"description": data.get("description"),
|
|
469
|
+
"schema": data.get("schema"),
|
|
470
|
+
},
|
|
471
|
+
}
|
|
472
|
+
info = ArtifactInfo(
|
|
473
|
+
artifact_name=data.get("filename"),
|
|
474
|
+
directory="", # Not applicable for remote
|
|
475
|
+
versions=[version_info],
|
|
476
|
+
)
|
|
477
|
+
artifact_infos.append(info)
|
|
478
|
+
return artifact_infos
|
|
387
479
|
|
|
388
|
-
|
|
389
|
-
|
|
480
|
+
except requests.RequestException as e:
|
|
481
|
+
log.error(f"Failed to fetch remote artifacts: {e}")
|
|
482
|
+
return []
|
|
483
|
+
except json.JSONDecodeError:
|
|
484
|
+
log.error("Failed to decode JSON response from artifact API")
|
|
485
|
+
return []
|
|
486
|
+
|
|
487
|
+
def _get_local_artifacts(
|
|
488
|
+
self, namespace: str, context_id: str
|
|
489
|
+
) -> list[ArtifactInfo]:
|
|
490
|
+
"""Retrieve information about artifacts from the local session directory."""
|
|
390
491
|
artifact_info = []
|
|
391
|
-
session_dir =
|
|
392
|
-
self.base_path
|
|
492
|
+
session_dir = (
|
|
493
|
+
Path(self.base_path) / namespace / self.user_identity / context_id
|
|
393
494
|
)
|
|
394
495
|
|
|
395
|
-
if not
|
|
496
|
+
if not session_dir.is_dir():
|
|
396
497
|
return artifact_info
|
|
397
498
|
|
|
398
|
-
for
|
|
399
|
-
item_path
|
|
400
|
-
if os.path.isdir(item_path) and not item.endswith(".metadata.json"):
|
|
499
|
+
for item_path in session_dir.iterdir():
|
|
500
|
+
if item_path.is_dir() and not item_path.name.endswith(".metadata.json"):
|
|
401
501
|
artifact_info.append(
|
|
402
|
-
self._process_artifact_directory(
|
|
502
|
+
self._process_artifact_directory(
|
|
503
|
+
session_dir, item_path.name, item_path
|
|
504
|
+
)
|
|
403
505
|
)
|
|
404
506
|
|
|
405
507
|
return artifact_info
|
|
406
508
|
|
|
407
509
|
def _process_artifact_directory(
|
|
408
|
-
self, session_dir:
|
|
510
|
+
self, session_dir: Path, artifact_name: str, item_path: Path
|
|
409
511
|
) -> ArtifactInfo:
|
|
410
512
|
"""Process a single artifact directory and extract metadata."""
|
|
411
|
-
metadata_dir =
|
|
513
|
+
metadata_dir = session_dir / f"{artifact_name}.metadata.json"
|
|
412
514
|
versions = []
|
|
413
515
|
|
|
414
|
-
if
|
|
415
|
-
for
|
|
416
|
-
if not
|
|
417
|
-
version_metadata_path =
|
|
418
|
-
if
|
|
516
|
+
if metadata_dir.is_dir():
|
|
517
|
+
for version_path in item_path.iterdir():
|
|
518
|
+
if not version_path.name.endswith(".meta"):
|
|
519
|
+
version_metadata_path = metadata_dir / version_path.name
|
|
520
|
+
if version_metadata_path.exists():
|
|
419
521
|
try:
|
|
420
|
-
with open(
|
|
522
|
+
with version_metadata_path.open() as f:
|
|
421
523
|
metadata = json.load(f)
|
|
422
524
|
versions.append(
|
|
423
|
-
{"version":
|
|
525
|
+
{"version": version_path.name, "metadata": metadata}
|
|
424
526
|
)
|
|
425
527
|
except (json.JSONDecodeError, FileNotFoundError):
|
|
426
528
|
continue
|
|
427
529
|
|
|
428
530
|
return ArtifactInfo(
|
|
429
|
-
artifact_name=artifact_name, directory=item_path, versions=versions
|
|
531
|
+
artifact_name=artifact_name, directory=str(item_path), versions=versions
|
|
430
532
|
)
|
|
431
533
|
|
|
432
534
|
def categorize_artifacts(
|
|
433
535
|
self,
|
|
434
|
-
artifacts:
|
|
435
|
-
test_case:
|
|
436
|
-
tool_calls:
|
|
437
|
-
) ->
|
|
536
|
+
artifacts: list[ArtifactInfo],
|
|
537
|
+
test_case: dict[str, any],
|
|
538
|
+
tool_calls: list[ToolCall],
|
|
539
|
+
) -> tuple[list[ArtifactInfo], list[ArtifactInfo]]:
|
|
438
540
|
"""Categorize artifacts into input and output based on test case and tool calls."""
|
|
439
541
|
input_artifacts = []
|
|
440
542
|
output_artifacts = []
|
|
@@ -463,8 +565,8 @@ class ArtifactService:
|
|
|
463
565
|
return input_artifacts, output_artifacts
|
|
464
566
|
|
|
465
567
|
def _create_tool_output_mapping(
|
|
466
|
-
self, tool_calls:
|
|
467
|
-
) ->
|
|
568
|
+
self, tool_calls: list[ToolCall]
|
|
569
|
+
) -> dict[str, ToolCall]:
|
|
468
570
|
"""Create mapping of output filenames to the tools that created them."""
|
|
469
571
|
tool_output_mapping = {}
|
|
470
572
|
|
|
@@ -484,7 +586,7 @@ class ArtifactService:
|
|
|
484
586
|
return tool_output_mapping
|
|
485
587
|
|
|
486
588
|
def _enhance_input_artifact(
|
|
487
|
-
self, artifact: ArtifactInfo, test_case:
|
|
589
|
+
self, artifact: ArtifactInfo, test_case: dict[str, any]
|
|
488
590
|
) -> ArtifactInfo:
|
|
489
591
|
"""Enhance input artifact with test case information."""
|
|
490
592
|
enhanced_artifact = ArtifactInfo(
|
|
@@ -500,7 +602,7 @@ class ArtifactService:
|
|
|
500
602
|
for tc_artifact in test_case_artifacts:
|
|
501
603
|
if (
|
|
502
604
|
tc_artifact.get("type") == "file"
|
|
503
|
-
and
|
|
605
|
+
and Path(tc_artifact["path"]).name == artifact.artifact_name
|
|
504
606
|
):
|
|
505
607
|
enhanced_artifact.artifact_type = tc_artifact["type"]
|
|
506
608
|
enhanced_artifact.source_path = tc_artifact["path"]
|
|
@@ -509,7 +611,7 @@ class ArtifactService:
|
|
|
509
611
|
return enhanced_artifact
|
|
510
612
|
|
|
511
613
|
def _enhance_output_artifact(
|
|
512
|
-
self, artifact: ArtifactInfo, tool_output_mapping:
|
|
614
|
+
self, artifact: ArtifactInfo, tool_output_mapping: dict[str, ToolCall]
|
|
513
615
|
) -> ArtifactInfo:
|
|
514
616
|
"""Enhance output artifact with tool creation information."""
|
|
515
617
|
enhanced_artifact = ArtifactInfo(
|
|
@@ -531,14 +633,15 @@ class ArtifactService:
|
|
|
531
633
|
class SummaryBuilder:
|
|
532
634
|
"""Main orchestrator for summary creation."""
|
|
533
635
|
|
|
534
|
-
def __init__(self):
|
|
636
|
+
def __init__(self, config: TestSuiteConfiguration):
|
|
637
|
+
self.config = config
|
|
535
638
|
self.file_service = FileService()
|
|
536
639
|
self.test_case_service = TestCaseService()
|
|
537
640
|
self.time_processor = TimeProcessor()
|
|
538
641
|
self.message_processor = MessageProcessor()
|
|
539
|
-
self.artifact_service
|
|
642
|
+
self.artifact_service = ArtifactService(self.config)
|
|
540
643
|
|
|
541
|
-
def summarize_run(self, messages_file_path: str) ->
|
|
644
|
+
def summarize_run(self, messages_file_path: str) -> dict[str, any]:
|
|
542
645
|
"""
|
|
543
646
|
Create a comprehensive summary of a test run from messages.json file.
|
|
544
647
|
|
|
@@ -554,8 +657,8 @@ class SummaryBuilder:
|
|
|
554
657
|
if not messages:
|
|
555
658
|
return {}
|
|
556
659
|
|
|
557
|
-
run_path =
|
|
558
|
-
test_case_info_path =
|
|
660
|
+
run_path = Path(messages_file_path).parent
|
|
661
|
+
test_case_info_path = run_path / "test_case_info.json"
|
|
559
662
|
test_case_info = self.file_service.load_json(test_case_info_path)
|
|
560
663
|
test_case_path = test_case_info["path"]
|
|
561
664
|
|
|
@@ -575,19 +678,19 @@ class SummaryBuilder:
|
|
|
575
678
|
|
|
576
679
|
except Exception as e:
|
|
577
680
|
# Return minimal summary with error information
|
|
578
|
-
run_path =
|
|
681
|
+
run_path = Path(messages_file_path).parent
|
|
579
682
|
return {
|
|
580
|
-
"test_case_id":
|
|
581
|
-
"run_id":
|
|
683
|
+
"test_case_id": run_path.parent.name,
|
|
684
|
+
"run_id": run_path.name,
|
|
582
685
|
"errors": [f"Failed to process summary: {str(e)}"],
|
|
583
686
|
}
|
|
584
687
|
|
|
585
688
|
def _load_and_validate_messages(
|
|
586
689
|
self, messages_file_path: str
|
|
587
|
-
) ->
|
|
690
|
+
) -> list[dict[str, any]]:
|
|
588
691
|
"""Load and validate messages from file."""
|
|
589
692
|
try:
|
|
590
|
-
messages = self.file_service.load_json(messages_file_path)
|
|
693
|
+
messages = self.file_service.load_json(Path(messages_file_path))
|
|
591
694
|
return messages if isinstance(messages, list) else []
|
|
592
695
|
except Exception:
|
|
593
696
|
return []
|
|
@@ -596,17 +699,15 @@ class SummaryBuilder:
|
|
|
596
699
|
self, messages_file_path: str, test_case_path: str
|
|
597
700
|
) -> RunSummary:
|
|
598
701
|
"""Initialize summary with basic path-derived information."""
|
|
599
|
-
run_path =
|
|
600
|
-
run_id =
|
|
601
|
-
test_case_id =
|
|
602
|
-
".test", ""
|
|
603
|
-
)
|
|
702
|
+
run_path = Path(messages_file_path).parent
|
|
703
|
+
run_id = run_path.name
|
|
704
|
+
test_case_id = Path(test_case_path).stem.replace(".test", "")
|
|
604
705
|
|
|
605
706
|
return RunSummary(test_case_id=test_case_id, run_id=run_id)
|
|
606
707
|
|
|
607
708
|
def _load_test_case(
|
|
608
709
|
self, summary: RunSummary, test_case_path: str
|
|
609
|
-
) ->
|
|
710
|
+
) -> dict[str, any]:
|
|
610
711
|
"""Load test case and update summary with test case info."""
|
|
611
712
|
test_case = self.test_case_service.load_test_case(test_case_path)
|
|
612
713
|
|
|
@@ -621,9 +722,9 @@ class SummaryBuilder:
|
|
|
621
722
|
|
|
622
723
|
def _process_messages(
|
|
623
724
|
self,
|
|
624
|
-
messages:
|
|
725
|
+
messages: list[dict[str, any]],
|
|
625
726
|
summary: RunSummary,
|
|
626
|
-
test_case:
|
|
727
|
+
test_case: dict[str, any],
|
|
627
728
|
):
|
|
628
729
|
"""Process all messages to extract relevant information."""
|
|
629
730
|
if not messages:
|
|
@@ -666,8 +767,8 @@ class SummaryBuilder:
|
|
|
666
767
|
|
|
667
768
|
def _process_time_metrics(
|
|
668
769
|
self,
|
|
669
|
-
first_message:
|
|
670
|
-
last_message:
|
|
770
|
+
first_message: dict[str, any],
|
|
771
|
+
last_message: dict[str, any],
|
|
671
772
|
summary: RunSummary,
|
|
672
773
|
):
|
|
673
774
|
"""Process and calculate time metrics."""
|
|
@@ -690,19 +791,14 @@ class SummaryBuilder:
|
|
|
690
791
|
"Could not parse start or end time to calculate duration."
|
|
691
792
|
)
|
|
692
793
|
|
|
693
|
-
def _add_artifact_information(self, summary: RunSummary, test_case:
|
|
794
|
+
def _add_artifact_information(self, summary: RunSummary, test_case: dict[str, any]):
|
|
694
795
|
"""Add artifact information if configuration is available."""
|
|
695
|
-
if not summary.
|
|
796
|
+
if not summary.context_id:
|
|
696
797
|
return
|
|
697
798
|
|
|
698
799
|
try:
|
|
699
|
-
# Initialize artifact service if not already done
|
|
700
|
-
if not self.artifact_service:
|
|
701
|
-
base_path, user_identity = ConfigService.get_artifact_config()
|
|
702
|
-
self.artifact_service = ArtifactService(base_path, user_identity)
|
|
703
|
-
|
|
704
800
|
# Get and categorize artifacts
|
|
705
|
-
all_artifacts = self.artifact_service.
|
|
801
|
+
all_artifacts = self.artifact_service.get_artifacts(
|
|
706
802
|
summary.namespace, summary.context_id
|
|
707
803
|
)
|
|
708
804
|
|
|
@@ -719,7 +815,9 @@ class SummaryBuilder:
|
|
|
719
815
|
summary.errors.append(f"Could not add artifact info: {str(e)}")
|
|
720
816
|
|
|
721
817
|
|
|
722
|
-
def summarize_run(
|
|
818
|
+
def summarize_run(
|
|
819
|
+
messages_file_path: str, config: TestSuiteConfiguration
|
|
820
|
+
) -> dict[str, any]:
|
|
723
821
|
"""
|
|
724
822
|
Main entry point for summarizing a test run.
|
|
725
823
|
|
|
@@ -728,11 +826,12 @@ def summarize_run(messages_file_path: str) -> Dict[str, Any]:
|
|
|
728
826
|
|
|
729
827
|
Args:
|
|
730
828
|
messages_file_path: Path to the messages.json file
|
|
829
|
+
config: The test suite configuration.
|
|
731
830
|
|
|
732
831
|
Returns:
|
|
733
832
|
Dictionary containing the summarized metrics
|
|
734
833
|
"""
|
|
735
|
-
builder = SummaryBuilder()
|
|
834
|
+
builder = SummaryBuilder(config)
|
|
736
835
|
return builder.summarize_run(messages_file_path)
|
|
737
836
|
|
|
738
837
|
|
|
@@ -740,29 +839,40 @@ def main():
|
|
|
740
839
|
"""Main entry point for command-line usage."""
|
|
741
840
|
import sys
|
|
742
841
|
|
|
842
|
+
from .shared import EvaluationConfigLoader
|
|
843
|
+
|
|
743
844
|
if len(sys.argv) != 2:
|
|
744
|
-
|
|
845
|
+
log.info("Usage: python summarize_refactored.py <messages_file_path>")
|
|
745
846
|
sys.exit(1)
|
|
746
847
|
|
|
747
|
-
messages_file_path = sys.argv[1]
|
|
848
|
+
messages_file_path = Path(sys.argv[1])
|
|
748
849
|
|
|
749
|
-
if not
|
|
750
|
-
|
|
850
|
+
if not messages_file_path.exists():
|
|
851
|
+
log.info(f"Error: Messages file not found at: {messages_file_path}")
|
|
751
852
|
sys.exit(1)
|
|
752
853
|
|
|
753
854
|
try:
|
|
855
|
+
# This main function is for standalone testing. It needs a config.
|
|
856
|
+
# We'll assume a default config for this purpose.
|
|
857
|
+
config_path = Path.cwd() / "tests" / "evaluation" / "config.json"
|
|
858
|
+
if not config_path.exists():
|
|
859
|
+
log.error(f"Default test config not found at {config_path}")
|
|
860
|
+
return
|
|
861
|
+
config_loader = EvaluationConfigLoader(str(config_path))
|
|
862
|
+
config = config_loader.load_configuration()
|
|
863
|
+
|
|
754
864
|
# Generate summary
|
|
755
|
-
summary_data = summarize_run(messages_file_path)
|
|
865
|
+
summary_data = summarize_run(str(messages_file_path), config)
|
|
756
866
|
|
|
757
867
|
# Save summary file
|
|
758
|
-
output_dir =
|
|
759
|
-
summary_file_path =
|
|
868
|
+
output_dir = messages_file_path.parent
|
|
869
|
+
summary_file_path = output_dir / "summary.json"
|
|
760
870
|
|
|
761
871
|
FileService.save_json(summary_data, summary_file_path)
|
|
762
|
-
|
|
872
|
+
log.info(f"Summary file created at: {summary_file_path}")
|
|
763
873
|
|
|
764
874
|
except Exception as e:
|
|
765
|
-
|
|
875
|
+
log.error(f"Error generating summary: {e}")
|
|
766
876
|
sys.exit(1)
|
|
767
877
|
|
|
768
878
|
|