solace-agent-mesh 1.5.1__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of solace-agent-mesh might be problematic. Click here for more details.
- solace_agent_mesh/agent/adk/callbacks.py +0 -5
- solace_agent_mesh/agent/adk/models/lite_llm.py +123 -8
- solace_agent_mesh/agent/adk/models/oauth2_token_manager.py +245 -0
- solace_agent_mesh/agent/protocol/event_handlers.py +213 -31
- solace_agent_mesh/agent/proxies/__init__.py +0 -0
- solace_agent_mesh/agent/proxies/a2a/__init__.py +3 -0
- solace_agent_mesh/agent/proxies/a2a/app.py +55 -0
- solace_agent_mesh/agent/proxies/a2a/component.py +1115 -0
- solace_agent_mesh/agent/proxies/a2a/config.py +140 -0
- solace_agent_mesh/agent/proxies/a2a/oauth_token_cache.py +104 -0
- solace_agent_mesh/agent/proxies/base/__init__.py +3 -0
- solace_agent_mesh/agent/proxies/base/app.py +99 -0
- solace_agent_mesh/agent/proxies/base/component.py +650 -0
- solace_agent_mesh/agent/proxies/base/config.py +85 -0
- solace_agent_mesh/agent/proxies/base/proxy_task_context.py +17 -0
- solace_agent_mesh/agent/sac/app.py +58 -5
- solace_agent_mesh/agent/sac/component.py +238 -75
- solace_agent_mesh/agent/sac/task_execution_context.py +46 -0
- solace_agent_mesh/agent/tools/audio_tools.py +125 -8
- solace_agent_mesh/agent/tools/web_tools.py +10 -5
- solace_agent_mesh/agent/utils/artifact_helpers.py +141 -3
- solace_agent_mesh/assets/docs/404.html +3 -3
- solace_agent_mesh/assets/docs/assets/js/5c2bd65f.eda4bcb2.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.f4b15f3b.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/71da7b71.38583438.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/77cf947d.48cb18a2.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/924ffdeb.8095e148.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/9e9d0a82.570c057b.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{ad71b5ed.60668e9e.js → ad71b5ed.af3ecfd1.js} +1 -1
- solace_agent_mesh/assets/docs/assets/js/ceb2a7a6.5d92d7d0.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{da0b5bad.9d369087.js → da0b5bad.d08a9466.js} +1 -1
- solace_agent_mesh/assets/docs/assets/js/db924877.e98d12a1.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/de915948.27d6b065.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{e3d9abda.2b916f9e.js → e3d9abda.6b9493d0.js} +1 -1
- solace_agent_mesh/assets/docs/assets/js/e6f9706b.e74a984d.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/f284c35a.42f59cdd.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/ff4d71f2.15b02f97.js +1 -0
- solace_agent_mesh/assets/docs/assets/js/{main.bd3c34f3.js → main.b12eac43.js} +2 -2
- solace_agent_mesh/assets/docs/assets/js/runtime~main.e268214e.js +1 -0
- solace_agent_mesh/assets/docs/docs/documentation/components/agents/index.html +15 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/artifact-management/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/audio-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/data-analysis-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/embeds/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/builtin-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/cli/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/gateways/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/orchestrator/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/plugins/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/components/proxies/index.html +262 -0
- solace_agent_mesh/assets/docs/docs/documentation/deploying/debugging/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/deploying/deployment-options/index.html +31 -3
- solace_agent_mesh/assets/docs/docs/documentation/deploying/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/deploying/observability/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/developing/create-agents/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/create-gateways/index.html +5 -5
- solace_agent_mesh/assets/docs/docs/documentation/developing/creating-python-tools/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/creating-service-providers/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/evaluations/index.html +135 -0
- solace_agent_mesh/assets/docs/docs/documentation/developing/index.html +6 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/structure/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/bedrock-agents/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/custom-agent/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/event-mesh-gateway/index.html +5 -5
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mcp-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/mongodb-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rag-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/rest-gateway/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/slack-integration/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/developing/tutorials/sql-database/index.html +4 -4
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/installation/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/rbac-setup-guide/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/enterprise/single-sign-on/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/architecture/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/introduction/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/getting-started/try-agent-mesh/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/configurations/index.html +6 -5
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/installation/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/large_language_models/index.html +100 -3
- solace_agent_mesh/assets/docs/docs/documentation/installing-and-configuring/run-project/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-gateway-upgrade-to-0.3.0/index.html +3 -3
- solace_agent_mesh/assets/docs/docs/documentation/migrations/a2a-upgrade/a2a-technical-migration-map/index.html +3 -3
- solace_agent_mesh/assets/docs/lunr-index-1761248203150.json +1 -0
- solace_agent_mesh/assets/docs/lunr-index.json +1 -1
- solace_agent_mesh/assets/docs/search-doc-1761248203150.json +1 -0
- solace_agent_mesh/assets/docs/search-doc.json +1 -1
- solace_agent_mesh/assets/docs/sitemap.xml +1 -1
- solace_agent_mesh/cli/__init__.py +1 -1
- solace_agent_mesh/cli/commands/add_cmd/agent_cmd.py +2 -69
- solace_agent_mesh/cli/commands/eval_cmd.py +11 -49
- solace_agent_mesh/cli/commands/init_cmd/__init__.py +0 -5
- solace_agent_mesh/cli/commands/init_cmd/env_step.py +10 -12
- solace_agent_mesh/cli/commands/init_cmd/orchestrator_step.py +9 -61
- solace_agent_mesh/cli/commands/init_cmd/webui_gateway_step.py +9 -49
- solace_agent_mesh/cli/commands/plugin_cmd/add_cmd.py +1 -2
- solace_agent_mesh/client/webui/frontend/static/assets/{authCallback-DwrxZE0E.js → authCallback-BTf6dqwp.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/assets/{client-DarGQzyw.js → client-CaY59VuC.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/assets/main-B32noGmR.js +342 -0
- solace_agent_mesh/client/webui/frontend/static/assets/main-DHJKSW1S.css +1 -0
- solace_agent_mesh/client/webui/frontend/static/assets/{vendor-BKIeiHj_.js → vendor-BEmvJSYz.js} +1 -1
- solace_agent_mesh/client/webui/frontend/static/auth-callback.html +3 -3
- solace_agent_mesh/client/webui/frontend/static/index.html +4 -4
- solace_agent_mesh/common/a2a/__init__.py +24 -0
- solace_agent_mesh/common/a2a/artifact.py +39 -0
- solace_agent_mesh/common/a2a/events.py +29 -0
- solace_agent_mesh/common/a2a/message.py +68 -0
- solace_agent_mesh/common/a2a/protocol.py +151 -1
- solace_agent_mesh/common/agent_registry.py +83 -3
- solace_agent_mesh/common/constants.py +3 -1
- solace_agent_mesh/common/sac/sam_component_base.py +383 -4
- solace_agent_mesh/common/utils/pydantic_utils.py +12 -0
- solace_agent_mesh/config_portal/backend/common.py +1 -1
- solace_agent_mesh/config_portal/frontend/static/client/assets/_index-ByU1X1HD.js +98 -0
- solace_agent_mesh/config_portal/frontend/static/client/assets/{manifest-44d62be6.js → manifest-61038fc6.js} +1 -1
- solace_agent_mesh/config_portal/frontend/static/client/index.html +1 -1
- solace_agent_mesh/evaluation/evaluator.py +128 -104
- solace_agent_mesh/evaluation/message_organizer.py +116 -110
- solace_agent_mesh/evaluation/report_data_processor.py +84 -86
- solace_agent_mesh/evaluation/report_generator.py +73 -79
- solace_agent_mesh/evaluation/run.py +421 -235
- solace_agent_mesh/evaluation/shared/__init__.py +92 -0
- solace_agent_mesh/evaluation/shared/constants.py +47 -0
- solace_agent_mesh/evaluation/shared/exceptions.py +50 -0
- solace_agent_mesh/evaluation/shared/helpers.py +35 -0
- solace_agent_mesh/evaluation/shared/test_case_loader.py +167 -0
- solace_agent_mesh/evaluation/shared/test_suite_loader.py +280 -0
- solace_agent_mesh/evaluation/subscriber.py +111 -232
- solace_agent_mesh/evaluation/summary_builder.py +227 -117
- solace_agent_mesh/gateway/base/app.py +16 -1
- solace_agent_mesh/gateway/base/component.py +112 -39
- solace_agent_mesh/gateway/http_sse/alembic/versions/20251015_add_session_performance_indexes.py +70 -0
- solace_agent_mesh/gateway/http_sse/component.py +99 -3
- solace_agent_mesh/gateway/http_sse/dependencies.py +4 -4
- solace_agent_mesh/gateway/http_sse/main.py +1 -0
- solace_agent_mesh/gateway/http_sse/repository/chat_task_repository.py +12 -13
- solace_agent_mesh/gateway/http_sse/repository/feedback_repository.py +15 -18
- solace_agent_mesh/gateway/http_sse/repository/interfaces.py +25 -18
- solace_agent_mesh/gateway/http_sse/repository/session_repository.py +30 -26
- solace_agent_mesh/gateway/http_sse/repository/task_repository.py +35 -44
- solace_agent_mesh/gateway/http_sse/routers/agent_cards.py +4 -3
- solace_agent_mesh/gateway/http_sse/routers/artifacts.py +95 -203
- solace_agent_mesh/gateway/http_sse/routers/dto/responses/session_responses.py +4 -3
- solace_agent_mesh/gateway/http_sse/routers/sessions.py +2 -2
- solace_agent_mesh/gateway/http_sse/routers/tasks.py +33 -41
- solace_agent_mesh/gateway/http_sse/routers/users.py +47 -1
- solace_agent_mesh/gateway/http_sse/routers/visualization.py +17 -11
- solace_agent_mesh/gateway/http_sse/services/data_retention_service.py +4 -4
- solace_agent_mesh/gateway/http_sse/services/feedback_service.py +51 -43
- solace_agent_mesh/gateway/http_sse/services/session_service.py +20 -20
- solace_agent_mesh/gateway/http_sse/services/task_logger_service.py +8 -8
- solace_agent_mesh/gateway/http_sse/shared/base_repository.py +45 -71
- solace_agent_mesh/gateway/http_sse/shared/types.py +0 -18
- solace_agent_mesh/templates/gateway_config_template.yaml +0 -5
- solace_agent_mesh/templates/logging_config_template.ini +10 -6
- solace_agent_mesh/templates/plugin_gateway_config_template.yaml +0 -3
- solace_agent_mesh/templates/shared_config.yaml +40 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.1.dist-info}/METADATA +47 -21
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.1.dist-info}/RECORD +166 -145
- solace_agent_mesh/assets/docs/assets/js/5c2bd65f.e49689dd.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/6ad8f0bd.39d5851d.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/71da7b71.804d6567.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/77cf947d.64c9bd6c.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/9e9d0a82.dd810042.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/db924877.cbc66f02.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/de915948.139b4b9c.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/e6f9706b.582a78ca.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/f284c35a.5766a13d.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/ff4d71f2.9c0297a6.js +0 -1
- solace_agent_mesh/assets/docs/assets/js/runtime~main.18dc45dd.js +0 -1
- solace_agent_mesh/assets/docs/lunr-index-1760121512891.json +0 -1
- solace_agent_mesh/assets/docs/search-doc-1760121512891.json +0 -1
- solace_agent_mesh/client/webui/frontend/static/assets/main-2nd1gbaH.js +0 -339
- solace_agent_mesh/client/webui/frontend/static/assets/main-DoKXctCM.css +0 -1
- solace_agent_mesh/config_portal/frontend/static/client/assets/_index-BNuqpWDc.js +0 -98
- solace_agent_mesh/evaluation/config_loader.py +0 -657
- solace_agent_mesh/evaluation/test_case_loader.py +0 -714
- /solace_agent_mesh/assets/docs/assets/js/{main.bd3c34f3.js.LICENSE.txt → main.b12eac43.js.LICENSE.txt} +0 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.1.dist-info}/WHEEL +0 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.1.dist-info}/entry_points.txt +0 -0
- {solace_agent_mesh-1.5.1.dist-info → solace_agent_mesh-1.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,27 +5,23 @@ This module extracts and processes evaluation data for HTML report generation.
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
-
|
|
8
|
+
import random
|
|
9
|
+
from collections import Counter, defaultdict
|
|
9
10
|
from dataclasses import dataclass, field
|
|
10
|
-
from
|
|
11
|
+
from datetime import datetime
|
|
11
12
|
from pathlib import Path
|
|
12
|
-
from collections import defaultdict, Counter
|
|
13
|
-
import random
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
from .test_case_loader import load_test_case
|
|
14
|
+
from .shared import load_test_case
|
|
17
15
|
|
|
18
|
-
|
|
19
|
-
logging.basicConfig(level=logging.INFO)
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
21
17
|
|
|
22
18
|
|
|
23
19
|
@dataclass
|
|
24
20
|
class EvaluationMetrics:
|
|
25
21
|
"""Core evaluation data structure."""
|
|
26
22
|
|
|
27
|
-
models:
|
|
28
|
-
total_execution_time:
|
|
23
|
+
models: list[str] = field(default_factory=list)
|
|
24
|
+
total_execution_time: float | None = None
|
|
29
25
|
total_execution_time_formatted: str = "Not available"
|
|
30
26
|
generation_time: str = field(
|
|
31
27
|
default_factory=lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
@@ -36,7 +32,7 @@ class EvaluationMetrics:
|
|
|
36
32
|
runs: str = "Not available"
|
|
37
33
|
total_tests: int = 0
|
|
38
34
|
duration: str = "Not available"
|
|
39
|
-
test_case_names:
|
|
35
|
+
test_case_names: list[str] = field(default_factory=list)
|
|
40
36
|
|
|
41
37
|
|
|
42
38
|
@dataclass
|
|
@@ -48,7 +44,7 @@ class ModelPerformance:
|
|
|
48
44
|
success_rate: float = 0.0
|
|
49
45
|
test_count: int = 0
|
|
50
46
|
estimated_cost: float = 0.0
|
|
51
|
-
scores:
|
|
47
|
+
scores: list[float] = field(default_factory=list)
|
|
52
48
|
|
|
53
49
|
|
|
54
50
|
@dataclass
|
|
@@ -58,7 +54,7 @@ class TestCaseResult:
|
|
|
58
54
|
test_case_id: str
|
|
59
55
|
category: str
|
|
60
56
|
description: str = ""
|
|
61
|
-
model_results:
|
|
57
|
+
model_results: dict[str, any] = field(default_factory=dict)
|
|
62
58
|
average_score: float = 0.0
|
|
63
59
|
|
|
64
60
|
|
|
@@ -66,9 +62,9 @@ class TestCaseResult:
|
|
|
66
62
|
class ChartConfiguration:
|
|
67
63
|
"""Chart and visualization data."""
|
|
68
64
|
|
|
69
|
-
categories:
|
|
70
|
-
datasets:
|
|
71
|
-
category_scores:
|
|
65
|
+
categories: list[str] = field(default_factory=list)
|
|
66
|
+
datasets: list[dict[str, any]] = field(default_factory=list)
|
|
67
|
+
category_scores: dict[str, dict[str, float]] = field(default_factory=dict)
|
|
72
68
|
|
|
73
69
|
|
|
74
70
|
@dataclass
|
|
@@ -76,27 +72,27 @@ class CategoryStatistics:
|
|
|
76
72
|
"""Category-based statistics."""
|
|
77
73
|
|
|
78
74
|
category_name: str
|
|
79
|
-
test_cases:
|
|
80
|
-
model_scores:
|
|
75
|
+
test_cases: list[str] = field(default_factory=list)
|
|
76
|
+
model_scores: dict[str, float] = field(default_factory=dict)
|
|
81
77
|
|
|
82
78
|
|
|
83
79
|
class FileService:
|
|
84
80
|
"""Handles file I/O operations with proper error handling."""
|
|
85
81
|
|
|
86
82
|
@staticmethod
|
|
87
|
-
def load_json(filepath: Path) ->
|
|
83
|
+
def load_json(filepath: Path) -> any:
|
|
88
84
|
"""Load JSON data from file."""
|
|
89
85
|
try:
|
|
90
|
-
with open(filepath
|
|
86
|
+
with open(filepath) as f:
|
|
91
87
|
return json.load(f)
|
|
92
88
|
except FileNotFoundError:
|
|
93
|
-
|
|
89
|
+
log.warning(f"File not found: {filepath}")
|
|
94
90
|
return None
|
|
95
91
|
except json.JSONDecodeError as e:
|
|
96
|
-
|
|
92
|
+
log.error(f"Invalid JSON in file {filepath}: {e}")
|
|
97
93
|
return None
|
|
98
94
|
except Exception as e:
|
|
99
|
-
|
|
95
|
+
log.error(f"Error reading file {filepath}: {e}")
|
|
100
96
|
return None
|
|
101
97
|
|
|
102
98
|
@staticmethod
|
|
@@ -105,7 +101,7 @@ class FileService:
|
|
|
105
101
|
return filepath.exists() and filepath.is_file()
|
|
106
102
|
|
|
107
103
|
@staticmethod
|
|
108
|
-
def list_directories(path: Path) ->
|
|
104
|
+
def list_directories(path: Path) -> list[str]:
|
|
109
105
|
"""List directories in the given path."""
|
|
110
106
|
try:
|
|
111
107
|
return [
|
|
@@ -114,7 +110,7 @@ class FileService:
|
|
|
114
110
|
if item.is_dir() and not item.name.startswith(".")
|
|
115
111
|
]
|
|
116
112
|
except Exception as e:
|
|
117
|
-
|
|
113
|
+
log.error(f"Error listing directories in {path}: {e}")
|
|
118
114
|
return []
|
|
119
115
|
|
|
120
116
|
|
|
@@ -124,7 +120,7 @@ class ResultsExtractionService:
|
|
|
124
120
|
def __init__(self, file_service: FileService):
|
|
125
121
|
self.file_service = file_service
|
|
126
122
|
|
|
127
|
-
def extract_model_results(self, results_dir: Path) ->
|
|
123
|
+
def extract_model_results(self, results_dir: Path) -> dict[str, any]:
|
|
128
124
|
"""Extract results for all models."""
|
|
129
125
|
model_results = {}
|
|
130
126
|
|
|
@@ -136,22 +132,22 @@ class ResultsExtractionService:
|
|
|
136
132
|
results_data = self.file_service.load_json(results_file)
|
|
137
133
|
if results_data:
|
|
138
134
|
model_results[model_name] = results_data
|
|
139
|
-
|
|
135
|
+
log.debug(f"Loaded results for model: {model_name}")
|
|
140
136
|
|
|
141
|
-
|
|
137
|
+
log.info(f"Extracted results for {len(model_results)} models")
|
|
142
138
|
return model_results
|
|
143
139
|
|
|
144
|
-
def extract_execution_stats(self, results_dir: Path) ->
|
|
140
|
+
def extract_execution_stats(self, results_dir: Path) -> dict[str, any] | None:
|
|
145
141
|
"""Extract execution statistics."""
|
|
146
142
|
stats_file = results_dir / "stats.json"
|
|
147
143
|
|
|
148
144
|
if self.file_service.file_exists(stats_file):
|
|
149
145
|
stats_data = self.file_service.load_json(stats_file)
|
|
150
146
|
if stats_data:
|
|
151
|
-
|
|
147
|
+
log.debug("Loaded execution statistics")
|
|
152
148
|
return stats_data
|
|
153
149
|
|
|
154
|
-
|
|
150
|
+
log.warning("No execution statistics found")
|
|
155
151
|
return None
|
|
156
152
|
|
|
157
153
|
|
|
@@ -160,7 +156,7 @@ class MetricsCalculationService:
|
|
|
160
156
|
|
|
161
157
|
@staticmethod
|
|
162
158
|
def calculate_model_performance(
|
|
163
|
-
model_name: str, results_data:
|
|
159
|
+
model_name: str, results_data: dict[str, any]
|
|
164
160
|
) -> ModelPerformance:
|
|
165
161
|
"""Calculate performance metrics for a single model."""
|
|
166
162
|
performance = ModelPerformance(model_name=model_name)
|
|
@@ -199,7 +195,7 @@ class MetricsCalculationService:
|
|
|
199
195
|
return performance
|
|
200
196
|
|
|
201
197
|
@staticmethod
|
|
202
|
-
def format_execution_time(total_time: float) ->
|
|
198
|
+
def format_execution_time(total_time: float) -> tuple[str, str]:
|
|
203
199
|
"""Format execution time into readable strings."""
|
|
204
200
|
minutes = int(total_time // 60)
|
|
205
201
|
seconds = int(total_time % 60)
|
|
@@ -208,12 +204,12 @@ class MetricsCalculationService:
|
|
|
208
204
|
return formatted, duration
|
|
209
205
|
|
|
210
206
|
@staticmethod
|
|
211
|
-
def calculate_run_statistics(model_results:
|
|
207
|
+
def calculate_run_statistics(model_results: dict[str, any]) -> tuple[int, str]:
|
|
212
208
|
"""Calculate run statistics from model results."""
|
|
213
209
|
test_cases = set()
|
|
214
210
|
all_run_counts = []
|
|
215
211
|
|
|
216
|
-
for
|
|
212
|
+
for _model_name, results in model_results.items():
|
|
217
213
|
if "test_cases" in results:
|
|
218
214
|
for test_case in results["test_cases"]:
|
|
219
215
|
test_case_id = test_case.get("test_case_id")
|
|
@@ -248,7 +244,7 @@ class ChartDataService:
|
|
|
248
244
|
self.file_service = file_service
|
|
249
245
|
|
|
250
246
|
def generate_chart_configuration(
|
|
251
|
-
self, model_results:
|
|
247
|
+
self, model_results: dict[str, any], test_cases: dict[str, dict[str, any]]
|
|
252
248
|
) -> ChartConfiguration:
|
|
253
249
|
"""Generate chart configuration data."""
|
|
254
250
|
chart_config = ChartConfiguration()
|
|
@@ -263,7 +259,7 @@ class ChartDataService:
|
|
|
263
259
|
|
|
264
260
|
# Prepare chart data
|
|
265
261
|
if category_scores:
|
|
266
|
-
chart_config.categories = sorted(
|
|
262
|
+
chart_config.categories = sorted(category_scores.keys())
|
|
267
263
|
chart_config.category_scores = category_scores
|
|
268
264
|
chart_config.datasets = self._generate_chart_datasets(
|
|
269
265
|
category_scores, model_results
|
|
@@ -272,12 +268,12 @@ class ChartDataService:
|
|
|
272
268
|
return chart_config
|
|
273
269
|
|
|
274
270
|
def _extract_category_mapping(
|
|
275
|
-
self, model_results:
|
|
276
|
-
) ->
|
|
271
|
+
self, model_results: dict[str, any]
|
|
272
|
+
) -> dict[str, set[str]]:
|
|
277
273
|
"""Extract category to test case mapping."""
|
|
278
274
|
category_test_mapping = defaultdict(set)
|
|
279
275
|
|
|
280
|
-
for
|
|
276
|
+
for _model_name, results in model_results.items():
|
|
281
277
|
if "test_cases" in results:
|
|
282
278
|
for test_case in results["test_cases"]:
|
|
283
279
|
test_id = test_case.get("test_case_id")
|
|
@@ -287,22 +283,22 @@ class ChartDataService:
|
|
|
287
283
|
|
|
288
284
|
# Convert sets to sorted lists
|
|
289
285
|
return {
|
|
290
|
-
cat: sorted(
|
|
286
|
+
cat: sorted(tests) for cat, tests in category_test_mapping.items()
|
|
291
287
|
}
|
|
292
288
|
|
|
293
289
|
def _calculate_category_scores(
|
|
294
290
|
self,
|
|
295
|
-
category_test_mapping:
|
|
296
|
-
test_cases:
|
|
297
|
-
model_results:
|
|
298
|
-
) ->
|
|
291
|
+
category_test_mapping: dict[str, list[str]],
|
|
292
|
+
test_cases: dict[str, dict[str, any]],
|
|
293
|
+
model_results: dict[str, any],
|
|
294
|
+
) -> dict[str, dict[str, float]]:
|
|
299
295
|
"""Calculate average scores by category for each model."""
|
|
300
296
|
category_scores = {}
|
|
301
297
|
|
|
302
298
|
for category, test_names in category_test_mapping.items():
|
|
303
299
|
category_scores[category] = {}
|
|
304
300
|
|
|
305
|
-
for model_name in model_results
|
|
301
|
+
for model_name in model_results:
|
|
306
302
|
scores = []
|
|
307
303
|
|
|
308
304
|
# Collect scores for this category and model
|
|
@@ -334,9 +330,9 @@ class ChartDataService:
|
|
|
334
330
|
|
|
335
331
|
def _generate_chart_datasets(
|
|
336
332
|
self,
|
|
337
|
-
category_scores:
|
|
338
|
-
model_results:
|
|
339
|
-
) ->
|
|
333
|
+
category_scores: dict[str, dict[str, float]],
|
|
334
|
+
model_results: dict[str, any],
|
|
335
|
+
) -> list[dict[str, any]]:
|
|
340
336
|
"""Generate chart datasets for visualization."""
|
|
341
337
|
# Enhanced model colors with better contrast
|
|
342
338
|
model_colors = {
|
|
@@ -352,7 +348,7 @@ class ChartDataService:
|
|
|
352
348
|
}
|
|
353
349
|
|
|
354
350
|
chart_datasets = []
|
|
355
|
-
categories = sorted(
|
|
351
|
+
categories = sorted(category_scores.keys())
|
|
356
352
|
|
|
357
353
|
for model_name in sorted(model_results.keys()):
|
|
358
354
|
model_data = []
|
|
@@ -363,8 +359,10 @@ class ChartDataService:
|
|
|
363
359
|
color = model_colors.get(model_name)
|
|
364
360
|
if color is None:
|
|
365
361
|
# Generate a random color if not in the predefined list
|
|
366
|
-
|
|
367
|
-
|
|
362
|
+
def generate_random_component():
|
|
363
|
+
return random.randint(0, 255)
|
|
364
|
+
|
|
365
|
+
color = f"#{generate_random_component():02x}{generate_random_component():02x}{generate_random_component():02x}"
|
|
368
366
|
|
|
369
367
|
chart_datasets.append(
|
|
370
368
|
{
|
|
@@ -388,8 +386,8 @@ class ModalDataService:
|
|
|
388
386
|
self.file_service = file_service
|
|
389
387
|
|
|
390
388
|
def generate_modal_test_data(
|
|
391
|
-
self, test_case_id: str, model_results:
|
|
392
|
-
) ->
|
|
389
|
+
self, test_case_id: str, model_results: dict[str, any]
|
|
390
|
+
) -> dict[str, any]:
|
|
393
391
|
"""Generate test data for modal JavaScript consumption."""
|
|
394
392
|
modal_data = {"model_scores": {}, "tool_scores": {}, "individual_runs": {}}
|
|
395
393
|
|
|
@@ -464,7 +462,7 @@ class TemplateDataService:
|
|
|
464
462
|
self.modal_service = ModalDataService(file_service)
|
|
465
463
|
|
|
466
464
|
def generate_performance_metrics_table(
|
|
467
|
-
self, model_performances:
|
|
465
|
+
self, model_performances: dict[str, ModelPerformance]
|
|
468
466
|
) -> str:
|
|
469
467
|
"""Generate HTML table rows for performance metrics."""
|
|
470
468
|
metrics_rows = []
|
|
@@ -489,9 +487,9 @@ class TemplateDataService:
|
|
|
489
487
|
|
|
490
488
|
def generate_breakdown_content(
|
|
491
489
|
self,
|
|
492
|
-
test_case_results:
|
|
493
|
-
model_performances:
|
|
494
|
-
model_results:
|
|
490
|
+
test_case_results: list[TestCaseResult],
|
|
491
|
+
model_performances: dict[str, ModelPerformance],
|
|
492
|
+
model_results: dict[str, any] = None,
|
|
495
493
|
) -> str:
|
|
496
494
|
"""Generate detailed breakdown content by category with modal support."""
|
|
497
495
|
# Group test cases by category
|
|
@@ -507,7 +505,7 @@ class TemplateDataService:
|
|
|
507
505
|
for test_result in test_results:
|
|
508
506
|
test_scores = []
|
|
509
507
|
|
|
510
|
-
for model_name,
|
|
508
|
+
for model_name, _performance in model_performances.items():
|
|
511
509
|
if test_result.test_case_id in test_result.model_results:
|
|
512
510
|
model_data = test_result.model_results[
|
|
513
511
|
test_result.test_case_id
|
|
@@ -576,7 +574,7 @@ class TemplateDataService:
|
|
|
576
574
|
|
|
577
575
|
category_tests.append(
|
|
578
576
|
f"""
|
|
579
|
-
<div class="test-item"
|
|
577
|
+
<div class="test-item"
|
|
580
578
|
data-test-name="{test_result.test_case_id}"
|
|
581
579
|
data-test-description="{test_result.description}"
|
|
582
580
|
data-test-data="{modal_data_json}">
|
|
@@ -609,7 +607,7 @@ class TemplateDataService:
|
|
|
609
607
|
|
|
610
608
|
return "".join(breakdown_sections)
|
|
611
609
|
|
|
612
|
-
def generate_model_execution_times(self, model_results:
|
|
610
|
+
def generate_model_execution_times(self, model_results: dict[str, any]) -> str:
|
|
613
611
|
"""Generate model execution times HTML."""
|
|
614
612
|
execution_times_html = []
|
|
615
613
|
|
|
@@ -649,8 +647,8 @@ class TemplateDataService:
|
|
|
649
647
|
return "".join(execution_times_html)
|
|
650
648
|
|
|
651
649
|
def calculate_best_worst_tests(
|
|
652
|
-
self, test_case_results:
|
|
653
|
-
) ->
|
|
650
|
+
self, test_case_results: list[TestCaseResult]
|
|
651
|
+
) -> tuple[str, str]:
|
|
654
652
|
"""Calculate best and worst performing tests."""
|
|
655
653
|
test_averages = {}
|
|
656
654
|
|
|
@@ -670,7 +668,7 @@ class TemplateDataService:
|
|
|
670
668
|
return "Not available", "Not available"
|
|
671
669
|
|
|
672
670
|
def calculate_average_time(
|
|
673
|
-
self, model_performances:
|
|
671
|
+
self, model_performances: dict[str, ModelPerformance]
|
|
674
672
|
) -> str:
|
|
675
673
|
"""Calculate overall average time."""
|
|
676
674
|
all_durations = []
|
|
@@ -703,8 +701,8 @@ class ModelResultsProcessor:
|
|
|
703
701
|
self.file_service = file_service
|
|
704
702
|
|
|
705
703
|
def organize_test_cases(
|
|
706
|
-
self, model_results:
|
|
707
|
-
) ->
|
|
704
|
+
self, model_results: dict[str, any]
|
|
705
|
+
) -> dict[str, dict[str, any]]:
|
|
708
706
|
"""Organize test cases by test case ID and model."""
|
|
709
707
|
test_cases = {}
|
|
710
708
|
|
|
@@ -720,8 +718,8 @@ class ModelResultsProcessor:
|
|
|
720
718
|
return test_cases
|
|
721
719
|
|
|
722
720
|
def create_test_case_results(
|
|
723
|
-
self, test_cases:
|
|
724
|
-
) ->
|
|
721
|
+
self, test_cases: dict[str, dict[str, any]]
|
|
722
|
+
) -> list[TestCaseResult]:
|
|
725
723
|
"""Create TestCaseResult objects from organized test cases."""
|
|
726
724
|
test_case_results = []
|
|
727
725
|
|
|
@@ -778,9 +776,9 @@ class ReportDataProcessor:
|
|
|
778
776
|
self.template_service = TemplateDataService(self.file_service)
|
|
779
777
|
self.processor = ModelResultsProcessor(self.file_service)
|
|
780
778
|
|
|
781
|
-
def get_evaluation_data(self, results_dir: Path) ->
|
|
779
|
+
def get_evaluation_data(self, results_dir: Path) -> dict[str, any]:
|
|
782
780
|
"""Extract and process basic evaluation data."""
|
|
783
|
-
|
|
781
|
+
log.info("Processing evaluation data...")
|
|
784
782
|
|
|
785
783
|
# Initialize metrics
|
|
786
784
|
metrics = EvaluationMetrics()
|
|
@@ -788,7 +786,7 @@ class ReportDataProcessor:
|
|
|
788
786
|
# Extract model results
|
|
789
787
|
model_results = self.extraction_service.extract_model_results(results_dir)
|
|
790
788
|
if not model_results:
|
|
791
|
-
|
|
789
|
+
log.warning("No model results found")
|
|
792
790
|
return self._metrics_to_dict(metrics)
|
|
793
791
|
|
|
794
792
|
# Set basic model information
|
|
@@ -813,17 +811,17 @@ class ReportDataProcessor:
|
|
|
813
811
|
metrics.total_execution_time_formatted = formatted_time
|
|
814
812
|
metrics.duration = duration
|
|
815
813
|
|
|
816
|
-
|
|
814
|
+
log.info(f"Processed evaluation data for {len(metrics.models)} models")
|
|
817
815
|
return self._metrics_to_dict(metrics)
|
|
818
816
|
|
|
819
|
-
def get_detailed_evaluation_data(self, results_dir: Path) ->
|
|
817
|
+
def get_detailed_evaluation_data(self, results_dir: Path) -> dict[str, any]:
|
|
820
818
|
"""Extract and process detailed evaluation data for charts and breakdowns."""
|
|
821
|
-
|
|
819
|
+
log.info("Processing detailed evaluation data...")
|
|
822
820
|
|
|
823
821
|
# Extract model results
|
|
824
822
|
model_results = self.extraction_service.extract_model_results(results_dir)
|
|
825
823
|
if not model_results:
|
|
826
|
-
|
|
824
|
+
log.warning("No model results found for detailed data")
|
|
827
825
|
return self._empty_detailed_data()
|
|
828
826
|
|
|
829
827
|
# Calculate model performances
|
|
@@ -875,10 +873,10 @@ class ReportDataProcessor:
|
|
|
875
873
|
"model_execution_times": model_execution_times,
|
|
876
874
|
}
|
|
877
875
|
|
|
878
|
-
|
|
876
|
+
log.info("Processed detailed evaluation data successfully")
|
|
879
877
|
return detailed_data
|
|
880
878
|
|
|
881
|
-
def _extract_test_case_names(self, model_results:
|
|
879
|
+
def _extract_test_case_names(self, model_results: dict[str, any]) -> list[str]:
|
|
882
880
|
"""Extract unique test case names from model results."""
|
|
883
881
|
test_case_names = set()
|
|
884
882
|
|
|
@@ -889,9 +887,9 @@ class ReportDataProcessor:
|
|
|
889
887
|
if test_case_id:
|
|
890
888
|
test_case_names.add(test_case_id)
|
|
891
889
|
|
|
892
|
-
return sorted(
|
|
890
|
+
return sorted(test_case_names)
|
|
893
891
|
|
|
894
|
-
def _metrics_to_dict(self, metrics: EvaluationMetrics) ->
|
|
892
|
+
def _metrics_to_dict(self, metrics: EvaluationMetrics) -> dict[str, any]:
|
|
895
893
|
"""Convert EvaluationMetrics to dictionary."""
|
|
896
894
|
# Generate model tags HTML
|
|
897
895
|
model_tags = ""
|
|
@@ -928,7 +926,7 @@ class ReportDataProcessor:
|
|
|
928
926
|
"test_cases_list": test_cases_list,
|
|
929
927
|
}
|
|
930
928
|
|
|
931
|
-
def _empty_detailed_data(self) ->
|
|
929
|
+
def _empty_detailed_data(self) -> dict[str, any]:
|
|
932
930
|
"""Return empty detailed data structure."""
|
|
933
931
|
return {
|
|
934
932
|
"performance_metrics_rows": "",
|
|
@@ -957,15 +955,15 @@ def main():
|
|
|
957
955
|
|
|
958
956
|
processor = ReportDataProcessor()
|
|
959
957
|
|
|
960
|
-
|
|
958
|
+
log.info("Testing evaluation data extraction...")
|
|
961
959
|
eval_data = processor.get_evaluation_data(results_dir)
|
|
962
|
-
|
|
960
|
+
log.info(f"Found {len(eval_data.get('models', []))} models")
|
|
963
961
|
|
|
964
|
-
|
|
962
|
+
log.info("Testing detailed evaluation data extraction...")
|
|
965
963
|
detailed_data = processor.get_detailed_evaluation_data(results_dir)
|
|
966
|
-
|
|
964
|
+
log.info(f"Total evaluations: {detailed_data.get('total_evaluations', 0)}")
|
|
967
965
|
|
|
968
|
-
|
|
966
|
+
log.info("Report data processing completed successfully!")
|
|
969
967
|
|
|
970
968
|
|
|
971
969
|
if __name__ == "__main__":
|