veadk-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (110) hide show
  1. veadk/__init__.py +31 -0
  2. veadk/a2a/__init__.py +13 -0
  3. veadk/a2a/agent_card.py +45 -0
  4. veadk/a2a/remote_ve_agent.py +19 -0
  5. veadk/a2a/ve_a2a_server.py +77 -0
  6. veadk/a2a/ve_agent_executor.py +78 -0
  7. veadk/a2a/ve_task_store.py +37 -0
  8. veadk/agent.py +253 -0
  9. veadk/cli/__init__.py +13 -0
  10. veadk/cli/main.py +278 -0
  11. veadk/cli/services/agentpilot/__init__.py +17 -0
  12. veadk/cli/services/agentpilot/agentpilot.py +77 -0
  13. veadk/cli/services/veapig/__init__.py +17 -0
  14. veadk/cli/services/veapig/apig.py +224 -0
  15. veadk/cli/services/veapig/apig_utils.py +332 -0
  16. veadk/cli/services/vefaas/__init__.py +17 -0
  17. veadk/cli/services/vefaas/template/deploy.py +44 -0
  18. veadk/cli/services/vefaas/template/src/app.py +30 -0
  19. veadk/cli/services/vefaas/template/src/config.py +58 -0
  20. veadk/cli/services/vefaas/vefaas.py +346 -0
  21. veadk/cli/services/vefaas/vefaas_utils.py +408 -0
  22. veadk/cli/services/vetls/__init__.py +17 -0
  23. veadk/cli/services/vetls/vetls.py +87 -0
  24. veadk/cli/studio/__init__.py +13 -0
  25. veadk/cli/studio/agent_processor.py +247 -0
  26. veadk/cli/studio/fast_api.py +232 -0
  27. veadk/cli/studio/model.py +116 -0
  28. veadk/cloud/__init__.py +13 -0
  29. veadk/cloud/cloud_agent_engine.py +144 -0
  30. veadk/cloud/cloud_app.py +123 -0
  31. veadk/cloud/template/app.py +30 -0
  32. veadk/cloud/template/config.py +55 -0
  33. veadk/config.py +131 -0
  34. veadk/consts.py +17 -0
  35. veadk/database/__init__.py +17 -0
  36. veadk/database/base_database.py +45 -0
  37. veadk/database/database_factory.py +80 -0
  38. veadk/database/kv/__init__.py +13 -0
  39. veadk/database/kv/redis_database.py +109 -0
  40. veadk/database/local_database.py +43 -0
  41. veadk/database/relational/__init__.py +13 -0
  42. veadk/database/relational/mysql_database.py +114 -0
  43. veadk/database/vector/__init__.py +13 -0
  44. veadk/database/vector/opensearch_vector_database.py +205 -0
  45. veadk/database/vector/type.py +50 -0
  46. veadk/database/viking/__init__.py +13 -0
  47. veadk/database/viking/viking_database.py +378 -0
  48. veadk/database/viking/viking_memory_db.py +521 -0
  49. veadk/evaluation/__init__.py +17 -0
  50. veadk/evaluation/adk_evaluator/__init__.py +13 -0
  51. veadk/evaluation/adk_evaluator/adk_evaluator.py +291 -0
  52. veadk/evaluation/base_evaluator.py +242 -0
  53. veadk/evaluation/deepeval_evaluator/__init__.py +17 -0
  54. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +223 -0
  55. veadk/evaluation/eval_set_file_loader.py +28 -0
  56. veadk/evaluation/eval_set_recorder.py +91 -0
  57. veadk/evaluation/utils/prometheus.py +142 -0
  58. veadk/knowledgebase/__init__.py +17 -0
  59. veadk/knowledgebase/knowledgebase.py +83 -0
  60. veadk/knowledgebase/knowledgebase_database_adapter.py +259 -0
  61. veadk/memory/__init__.py +13 -0
  62. veadk/memory/long_term_memory.py +119 -0
  63. veadk/memory/memory_database_adapter.py +235 -0
  64. veadk/memory/short_term_memory.py +124 -0
  65. veadk/memory/short_term_memory_processor.py +90 -0
  66. veadk/prompts/__init__.py +13 -0
  67. veadk/prompts/agent_default_prompt.py +30 -0
  68. veadk/prompts/prompt_evaluator.py +20 -0
  69. veadk/prompts/prompt_memory_processor.py +55 -0
  70. veadk/prompts/prompt_optimization.py +158 -0
  71. veadk/runner.py +252 -0
  72. veadk/tools/__init__.py +13 -0
  73. veadk/tools/builtin_tools/__init__.py +13 -0
  74. veadk/tools/builtin_tools/lark.py +67 -0
  75. veadk/tools/builtin_tools/las.py +23 -0
  76. veadk/tools/builtin_tools/vesearch.py +49 -0
  77. veadk/tools/builtin_tools/web_scraper.py +76 -0
  78. veadk/tools/builtin_tools/web_search.py +192 -0
  79. veadk/tools/demo_tools.py +58 -0
  80. veadk/tools/load_knowledgebase_tool.py +144 -0
  81. veadk/tools/sandbox/__init__.py +13 -0
  82. veadk/tools/sandbox/browser_sandbox.py +27 -0
  83. veadk/tools/sandbox/code_sandbox.py +30 -0
  84. veadk/tools/sandbox/computer_sandbox.py +27 -0
  85. veadk/tracing/__init__.py +13 -0
  86. veadk/tracing/base_tracer.py +172 -0
  87. veadk/tracing/telemetry/__init__.py +13 -0
  88. veadk/tracing/telemetry/exporters/__init__.py +13 -0
  89. veadk/tracing/telemetry/exporters/apiserver_exporter.py +60 -0
  90. veadk/tracing/telemetry/exporters/apmplus_exporter.py +101 -0
  91. veadk/tracing/telemetry/exporters/base_exporter.py +28 -0
  92. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +69 -0
  93. veadk/tracing/telemetry/exporters/inmemory_exporter.py +88 -0
  94. veadk/tracing/telemetry/exporters/tls_exporter.py +78 -0
  95. veadk/tracing/telemetry/metrics/__init__.py +13 -0
  96. veadk/tracing/telemetry/metrics/opentelemetry_metrics.py +73 -0
  97. veadk/tracing/telemetry/opentelemetry_tracer.py +167 -0
  98. veadk/types.py +23 -0
  99. veadk/utils/__init__.py +13 -0
  100. veadk/utils/logger.py +59 -0
  101. veadk/utils/misc.py +33 -0
  102. veadk/utils/patches.py +85 -0
  103. veadk/utils/volcengine_sign.py +199 -0
  104. veadk/version.py +15 -0
  105. veadk_python-0.1.0.dist-info/METADATA +124 -0
  106. veadk_python-0.1.0.dist-info/RECORD +110 -0
  107. veadk_python-0.1.0.dist-info/WHEEL +5 -0
  108. veadk_python-0.1.0.dist-info/entry_points.txt +2 -0
  109. veadk_python-0.1.0.dist-info/licenses/LICENSE +201 -0
  110. veadk_python-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,291 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import time
17
+ import uuid
18
+ from os import path
19
+ from typing import Any, Optional
20
+
21
+ from google.adk import Runner
22
+ from google.adk.artifacts import BaseArtifactService, InMemoryArtifactService
23
+ from google.adk.evaluation.agent_evaluator import (
24
+ NUM_RUNS,
25
+ RESPONSE_MATCH_SCORE_KEY,
26
+ TOOL_TRAJECTORY_SCORE_KEY,
27
+ AgentEvaluator,
28
+ )
29
+ from google.adk.evaluation.eval_case import IntermediateData, Invocation, SessionInput
30
+ from google.adk.evaluation.eval_set import EvalSet
31
+ from google.adk.evaluation.evaluation_generator import (
32
+ EvalCaseResponses,
33
+ EvaluationGenerator,
34
+ )
35
+ from google.adk.evaluation.evaluator import EvalStatus, EvaluationResult
36
+ from google.adk.sessions import BaseSessionService, InMemorySessionService
37
+ from typing_extensions import override
38
+
39
+ from veadk.agent import Agent
40
+
41
+ from ..base_evaluator import BaseEvaluator
42
+
43
+
44
+ def formatted_timestamp():
45
+ # YYYYMMDDHHMMSS
46
+ return time.strftime("%Y%m%d%H%M%S", time.localtime())
47
+
48
+
49
+ class VeEvaluationGenerator(EvaluationGenerator):
50
+ @staticmethod
51
+ async def _ve_process_query( # done
52
+ invocations: list[Invocation],
53
+ agent: Agent,
54
+ agent_name: Optional[str] = None,
55
+ initial_session: Optional[SessionInput] = None,
56
+ ):
57
+ agent_to_evaluate = agent
58
+ if agent_name:
59
+ agent_to_evaluate = agent.find_agent(agent_name)
60
+ assert agent_to_evaluate, f"Sub-Agent `{agent_name}` not found."
61
+
62
+ return await VeEvaluationGenerator._ve_generate_inferences_from_root_agent(
63
+ invocations, agent_to_evaluate, None, initial_session
64
+ )
65
+
66
+ @staticmethod
67
+ async def ve_generate_responses( # done
68
+ eval_set: EvalSet,
69
+ agent: Agent,
70
+ repeat_num: int = 3,
71
+ agent_name: str = None,
72
+ ):
73
+ results = []
74
+
75
+ for eval_case in eval_set.eval_cases:
76
+ responses = []
77
+ for _ in range(repeat_num):
78
+ response_invocations = await VeEvaluationGenerator._ve_process_query(
79
+ invocations=eval_case.conversation,
80
+ agent=agent,
81
+ agent_name=agent_name,
82
+ initial_session=eval_case.session_input,
83
+ )
84
+ responses.append(response_invocations)
85
+
86
+ results.append(EvalCaseResponses(eval_case=eval_case, responses=responses))
87
+
88
+ return results
89
+
90
+ @staticmethod
91
+ async def _ve_generate_inferences_from_root_agent(
92
+ invocations: list[Invocation],
93
+ root_agent: Agent,
94
+ reset_func: Any,
95
+ initial_session: Optional[SessionInput] = None,
96
+ session_id: Optional[str] = None,
97
+ session_service: Optional[BaseSessionService] = None,
98
+ artifact_service: Optional[BaseArtifactService] = None,
99
+ ) -> list[Invocation]:
100
+ """Scrapes the root agent given the list of Invocations."""
101
+ if not session_service:
102
+ session_service = InMemorySessionService()
103
+
104
+ app_name = (
105
+ initial_session.app_name if initial_session else "EvaluationGenerator"
106
+ )
107
+ user_id = initial_session.user_id if initial_session else "test_user_id"
108
+ session_id = session_id if session_id else str(uuid.uuid4())
109
+
110
+ _ = await session_service.create_session(
111
+ app_name=app_name,
112
+ user_id=user_id,
113
+ state=initial_session.state if initial_session else {},
114
+ session_id=session_id,
115
+ )
116
+
117
+ if not artifact_service:
118
+ artifact_service = InMemoryArtifactService()
119
+
120
+ if getattr(root_agent, "long_term_memory", None) is not None:
121
+ runner = Runner(
122
+ app_name=app_name,
123
+ agent=root_agent,
124
+ artifact_service=artifact_service,
125
+ session_service=session_service,
126
+ memory_service=root_agent.long_term_memory, # add long_term_memory
127
+ )
128
+ else:
129
+ runner = Runner(
130
+ app_name=app_name,
131
+ agent=root_agent,
132
+ artifact_service=artifact_service,
133
+ session_service=session_service,
134
+ )
135
+
136
+ # Reset agent state for each query
137
+ if callable(reset_func):
138
+ reset_func()
139
+
140
+ response_invocations = []
141
+
142
+ for invocation in invocations:
143
+ final_response = None
144
+ user_content = invocation.user_content
145
+ tool_uses = []
146
+ invocation_id = ""
147
+
148
+ async for event in runner.run_async(
149
+ user_id=user_id, session_id=session_id, new_message=user_content
150
+ ):
151
+ invocation_id = (
152
+ event.invocation_id if not invocation_id else invocation_id
153
+ )
154
+
155
+ if event.is_final_response() and event.content and event.content.parts:
156
+ final_response = event.content
157
+ elif event.get_function_calls():
158
+ for call in event.get_function_calls():
159
+ tool_uses.append(call)
160
+
161
+ response_invocations.append(
162
+ Invocation(
163
+ invocation_id=invocation_id,
164
+ user_content=user_content,
165
+ final_response=final_response,
166
+ intermediate_data=IntermediateData(tool_uses=tool_uses),
167
+ )
168
+ )
169
+
170
+ return response_invocations
171
+
172
+
173
+ class VeAgentEvaluator(AgentEvaluator):
174
+ def __init__(
175
+ self,
176
+ ):
177
+ super().__init__()
178
+
179
+ @staticmethod
180
+ async def ve_evaluate_eval_set(
181
+ agent: Agent,
182
+ eval_set: EvalSet,
183
+ criteria: dict[str, float],
184
+ num_runs=NUM_RUNS,
185
+ agent_name=None,
186
+ print_detailed_results: bool = True,
187
+ ):
188
+ eval_case_responses_list = await VeEvaluationGenerator.ve_generate_responses(
189
+ eval_set=eval_set,
190
+ agent=agent,
191
+ repeat_num=num_runs,
192
+ agent_name=agent_name,
193
+ )
194
+ failures = []
195
+ evaluation_result_list = []
196
+
197
+ for eval_case_responses in eval_case_responses_list:
198
+ actual_invocations = [
199
+ invocation
200
+ for invocations in eval_case_responses.responses
201
+ for invocation in invocations
202
+ ]
203
+ expected_invocations = eval_case_responses.eval_case.conversation * num_runs
204
+
205
+ for metric_name, threshold in criteria.items():
206
+ metric_evaluator = AgentEvaluator._get_metric_evaluator(
207
+ metric_name=metric_name, threshold=threshold
208
+ )
209
+
210
+ evaluation_result: EvaluationResult = (
211
+ metric_evaluator.evaluate_invocations(
212
+ actual_invocations=actual_invocations,
213
+ expected_invocations=expected_invocations,
214
+ )
215
+ )
216
+
217
+ if print_detailed_results:
218
+ AgentEvaluator._print_details(
219
+ evaluation_result=evaluation_result,
220
+ metric_name=metric_name,
221
+ threshold=threshold,
222
+ )
223
+
224
+ # Gather all the failures.
225
+ if evaluation_result.overall_eval_status != EvalStatus.PASSED:
226
+ failures.append(
227
+ f"{metric_name} for {agent.name} Failed. Expected {threshold},"
228
+ f" but got {evaluation_result.overall_score}."
229
+ )
230
+ evaluation_result_list.append(evaluation_result)
231
+
232
+ return evaluation_result_list, failures
233
+
234
+
235
+ class ADKEvaluator(BaseEvaluator):
236
+ def __init__(
237
+ self,
238
+ agent,
239
+ name: str = "veadk_adk_evaluator",
240
+ ):
241
+ super().__init__(agent=agent, name=name)
242
+
243
+ # TODO: implement
244
+
245
+ @override
246
+ async def eval(
247
+ self,
248
+ eval_set_file_path: str,
249
+ eval_id: str = f"test_{formatted_timestamp()}",
250
+ tool_score_threshold: float = 1.0,
251
+ response_match_score_threshold: float = 0.8,
252
+ num_runs: int = 2,
253
+ print_detailed_results: bool = True,
254
+ ):
255
+ test_files = []
256
+ eval_dataset_file_path_or_dir = eval_set_file_path
257
+ if isinstance(eval_dataset_file_path_or_dir, str) and os.path.isdir(
258
+ eval_dataset_file_path_or_dir
259
+ ):
260
+ for root, _, files in os.walk(eval_dataset_file_path_or_dir):
261
+ for file in files:
262
+ if file.endswith(".test.json"):
263
+ test_files.append(path.join(root, file))
264
+ else:
265
+ test_files = [eval_dataset_file_path_or_dir]
266
+
267
+ initial_session = AgentEvaluator._get_initial_session()
268
+
269
+ result = []
270
+ failures = []
271
+ for test_file in test_files:
272
+ criteria = {
273
+ TOOL_TRAJECTORY_SCORE_KEY: tool_score_threshold, # 1-point scale; 1.0 is perfect.
274
+ RESPONSE_MATCH_SCORE_KEY: response_match_score_threshold, # Rouge-1 text match; 0.8 is default.
275
+ }
276
+ eval_set = AgentEvaluator._load_eval_set_from_file(
277
+ test_file, criteria, initial_session
278
+ )
279
+
280
+ res, fail = await VeAgentEvaluator.ve_evaluate_eval_set(
281
+ agent=self.agent,
282
+ eval_set=eval_set,
283
+ criteria=criteria,
284
+ num_runs=num_runs,
285
+ agent_name=self.agent.name,
286
+ print_detailed_results=print_detailed_results,
287
+ )
288
+ result.append(res)
289
+ failures.extend(fail)
290
+
291
+ return result, failures
@@ -0,0 +1,242 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import time
17
+ import uuid
18
+ from abc import abstractmethod
19
+ from typing import Any
20
+
21
+ from google.adk import Runner
22
+ from google.adk.evaluation.eval_set import EvalSet
23
+ from google.adk.sessions import InMemorySessionService
24
+ from google.genai import types
25
+ from pydantic import BaseModel
26
+
27
+
28
+ class InvocationTestData(BaseModel):
29
+ invocation_id: str = ""
30
+ input: str
31
+ actual_output: str
32
+ expected_output: str
33
+ actual_tool: list[dict] = []
34
+ expected_tool: list[dict] = []
35
+ latency: str = "" # ms
36
+
37
+
38
+ class EvalCaseData(BaseModel):
39
+ invocations: list[InvocationTestData]
40
+
41
+
42
+ class MetricResult(BaseModel):
43
+ metric_type: str
44
+ success: bool
45
+ score: float
46
+ reason: str
47
+
48
+
49
+ class EvalResultData(BaseModel):
50
+ metric_results: list[MetricResult]
51
+ average_score: float = 0.0
52
+ total_reason: str = ""
53
+
54
+ def calculate_average_score(self):
55
+ total_score = sum(result.score for result in self.metric_results)
56
+ self.average_score = (
57
+ total_score / len(self.metric_results) if self.metric_results else 0.0
58
+ )
59
+
60
+ def generate_total_reason(self):
61
+ self.total_reason = "\n".join(
62
+ f"{result.metric_type:}:{result.reason}" for result in self.metric_results
63
+ )
64
+
65
+ def call_before_append(self):
66
+ self.calculate_average_score()
67
+ self.generate_total_reason()
68
+
69
+
70
+ class BaseEvaluator:
71
+ def __init__(
72
+ self,
73
+ agent,
74
+ name: str,
75
+ ):
76
+ self.name = name
77
+ self.agent = agent
78
+ self.invocation_list: list[EvalCaseData] = []
79
+ self.result_list: list[EvalResultData] = []
80
+ self.agent_information_list: list[dict] = []
81
+
82
+ def load_eval_set(self, eval_set_file: str) -> list[EvalSet]:
83
+ from .eval_set_file_loader import load_eval_set_from_file
84
+
85
+ return load_eval_set_from_file(eval_set_file)
86
+
87
+ def generate_eval_data(self, eval_set_file_path: str):
88
+ eval_case_data_list: list[EvalCaseData] = []
89
+
90
+ eval_cases = self.load_eval_set(eval_set_file_path).eval_cases
91
+ for eval_case in eval_cases:
92
+ eval_case_data = EvalCaseData(invocations=[])
93
+ self.agent_information_list.append(
94
+ {
95
+ "app_name": eval_case.session_input.app_name,
96
+ "user_id": eval_case.session_input.user_id,
97
+ "session_id": str(
98
+ uuid.uuid4()
99
+ ), # random session id for evaluation,
100
+ }
101
+ )
102
+
103
+ for invocation in eval_case.conversation:
104
+ _input: str = ""
105
+ _expected_output: str = ""
106
+ _expected_tool: list[dict] = []
107
+
108
+ user_content = invocation.user_content
109
+ _input = user_content.parts[0].text
110
+ _expected_output = invocation.final_response.parts[0].text
111
+
112
+ if invocation.intermediate_data.tool_uses:
113
+ for expected_tool_use in invocation.intermediate_data.tool_uses:
114
+ _expected_tool.append(
115
+ {
116
+ "name": expected_tool_use.name,
117
+ "args": expected_tool_use.args,
118
+ }
119
+ )
120
+
121
+ eval_case_data.invocations.append(
122
+ InvocationTestData(
123
+ invocation_id=invocation.invocation_id,
124
+ input=_input,
125
+ actual_output="",
126
+ actual_tool=[],
127
+ expected_output=_expected_output,
128
+ expected_tool=_expected_tool,
129
+ latency="",
130
+ )
131
+ )
132
+
133
+ eval_case_data_list.append(eval_case_data)
134
+ self.invocation_list = eval_case_data_list
135
+
136
+ async def _run_agent_for_actual_data(self):
137
+ for eval_case_data, agent_information in zip(
138
+ self.invocation_list, self.agent_information_list
139
+ ):
140
+ session_service = InMemorySessionService()
141
+ _ = await session_service.create_session(
142
+ app_name=agent_information["app_name"],
143
+ user_id=agent_information["user_id"],
144
+ state={},
145
+ session_id=agent_information["session_id"],
146
+ )
147
+
148
+ if getattr(self.agent, "long_term_memory", None):
149
+ runner = Runner(
150
+ app_name=agent_information["app_name"],
151
+ agent=self.agent,
152
+ session_service=session_service,
153
+ memory_service=self.agent.long_term_memory,
154
+ )
155
+ else:
156
+ runner = Runner(
157
+ app_name=agent_information["app_name"],
158
+ agent=self.agent,
159
+ session_service=session_service,
160
+ )
161
+
162
+ for invocation in eval_case_data.invocations:
163
+ _actual_output: str = ""
164
+ _actual_tool: list[dict] = []
165
+ _latency: str = ""
166
+ final_response = None
167
+ tool_uses = []
168
+ invocation_id = ""
169
+
170
+ user_content = types.Content(
171
+ role="user", parts=[types.Part(text=invocation.input)]
172
+ )
173
+ tik = time.time()
174
+ async for event in runner.run_async(
175
+ user_id=agent_information["user_id"],
176
+ session_id=agent_information["session_id"],
177
+ new_message=user_content,
178
+ ):
179
+ invocation_id = (
180
+ event.invocation_id if not invocation_id else invocation_id
181
+ )
182
+ if (
183
+ event.is_final_response()
184
+ and event.content
185
+ and event.content.parts
186
+ ):
187
+ final_response = event.content
188
+ elif event.get_function_calls():
189
+ for call in event.get_function_calls():
190
+ tool_uses.append(call)
191
+ tok = time.time()
192
+ _latency = str((tok - tik) * 1000)
193
+
194
+ if final_response and final_response.parts:
195
+ _actual_output = final_response.parts[0].text
196
+ for tool_use in tool_uses:
197
+ _actual_tool.append(
198
+ {
199
+ "name": tool_use.name,
200
+ "args": tool_use.args,
201
+ }
202
+ )
203
+
204
+ invocation.actual_output = _actual_output
205
+ invocation.actual_tool = _actual_tool
206
+ invocation.latency = _latency
207
+
208
+ def get_data(self) -> list[list[dict[str, Any]]]:
209
+ """Merge the evaluation data and return it in the format of list[list[dict]]"""
210
+ result = []
211
+ for i, eval_case in enumerate(self.invocation_list):
212
+ case_data = []
213
+ # Get corresponding eval_result or use default if not available
214
+ eval_result = (
215
+ self.result_list[i]
216
+ if i < len(self.result_list)
217
+ else EvalResultData(metric_results=[])
218
+ )
219
+ for invocation in eval_case.invocations:
220
+ data = {
221
+ "input": invocation.input,
222
+ "expected_output": invocation.expected_output,
223
+ "actual_output": invocation.actual_output,
224
+ "expected_tool": invocation.expected_tool,
225
+ "actual_tool": invocation.actual_tool,
226
+ "score": eval_result.average_score,
227
+ "reason": eval_result.total_reason,
228
+ "latency": invocation.latency,
229
+ }
230
+ case_data.append(data)
231
+ result.append(case_data)
232
+ return result
233
+
234
+ @abstractmethod
235
+ async def eval(
236
+ self,
237
+ eval_set_file_path: str,
238
+ metrics: list[Any],
239
+ eval_id: str,
240
+ ):
241
+ """An abstract method for evaluation based on metrics。"""
242
+ pass
@@ -0,0 +1,17 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .deepeval_evaluator import DeepevalEvaluator
16
+
17
+ __all__ = ["DeepevalEvaluator"]