veadk-python 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- veadk/a2a/remote_ve_agent.py +56 -1
- veadk/agent.py +79 -26
- veadk/agents/loop_agent.py +22 -9
- veadk/agents/parallel_agent.py +21 -9
- veadk/agents/sequential_agent.py +18 -9
- veadk/auth/veauth/apmplus_veauth.py +32 -39
- veadk/auth/veauth/ark_veauth.py +3 -1
- veadk/auth/veauth/utils.py +12 -0
- veadk/auth/veauth/viking_mem0_veauth.py +91 -0
- veadk/cli/cli.py +5 -1
- veadk/cli/cli_create.py +62 -1
- veadk/cli/cli_deploy.py +36 -1
- veadk/cli/cli_eval.py +55 -0
- veadk/cli/cli_init.py +44 -3
- veadk/cli/cli_kb.py +36 -1
- veadk/cli/cli_pipeline.py +66 -1
- veadk/cli/cli_prompt.py +16 -1
- veadk/cli/cli_uploadevalset.py +15 -1
- veadk/cli/cli_web.py +35 -4
- veadk/cloud/cloud_agent_engine.py +142 -25
- veadk/cloud/cloud_app.py +219 -12
- veadk/configs/database_configs.py +4 -0
- veadk/configs/model_configs.py +5 -1
- veadk/configs/tracing_configs.py +2 -2
- veadk/evaluation/adk_evaluator/adk_evaluator.py +77 -17
- veadk/evaluation/base_evaluator.py +219 -3
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +116 -1
- veadk/evaluation/eval_set_file_loader.py +20 -0
- veadk/evaluation/eval_set_recorder.py +54 -0
- veadk/evaluation/types.py +32 -0
- veadk/evaluation/utils/prometheus.py +61 -0
- veadk/knowledgebase/backends/base_backend.py +14 -1
- veadk/knowledgebase/backends/in_memory_backend.py +10 -1
- veadk/knowledgebase/backends/opensearch_backend.py +26 -0
- veadk/knowledgebase/backends/redis_backend.py +29 -2
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +43 -5
- veadk/knowledgebase/knowledgebase.py +173 -12
- veadk/memory/long_term_memory.py +148 -4
- veadk/memory/long_term_memory_backends/mem0_backend.py +11 -0
- veadk/memory/short_term_memory.py +119 -5
- veadk/runner.py +412 -1
- veadk/tools/builtin_tools/llm_shield.py +381 -0
- veadk/tools/builtin_tools/mcp_router.py +9 -2
- veadk/tools/builtin_tools/run_code.py +25 -5
- veadk/tools/builtin_tools/web_search.py +38 -154
- veadk/tracing/base_tracer.py +28 -1
- veadk/tracing/telemetry/attributes/extractors/common_attributes_extractors.py +105 -1
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +260 -0
- veadk/tracing/telemetry/attributes/extractors/tool_attributes_extractors.py +69 -0
- veadk/tracing/telemetry/attributes/extractors/types.py +78 -0
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +157 -0
- veadk/tracing/telemetry/exporters/base_exporter.py +8 -0
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +60 -1
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +118 -1
- veadk/tracing/telemetry/exporters/tls_exporter.py +66 -0
- veadk/tracing/telemetry/opentelemetry_tracer.py +111 -1
- veadk/tracing/telemetry/telemetry.py +118 -2
- veadk/version.py +1 -1
- {veadk_python-0.2.16.dist-info → veadk_python-0.2.17.dist-info}/METADATA +1 -1
- {veadk_python-0.2.16.dist-info → veadk_python-0.2.17.dist-info}/RECORD +64 -62
- {veadk_python-0.2.16.dist-info → veadk_python-0.2.17.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.16.dist-info → veadk_python-0.2.17.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.16.dist-info → veadk_python-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.16.dist-info → veadk_python-0.2.17.dist-info}/top_level.txt +0 -0
veadk/cloud/cloud_app.py
CHANGED
|
@@ -23,17 +23,37 @@ from a2a.types import AgentCard, Message, MessageSendParams, SendMessageRequest
|
|
|
23
23
|
|
|
24
24
|
from veadk.config import getenv
|
|
25
25
|
from veadk.utils.logger import get_logger
|
|
26
|
+
from veadk.integrations.ve_faas.ve_faas import VeFaaS
|
|
26
27
|
|
|
27
28
|
logger = get_logger(__name__)
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class CloudApp:
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
"""Represents a deployed cloud agent application on Volcengine FaaS platform.
|
|
33
|
+
|
|
34
|
+
This class facilitates interaction with the deployed agent via A2A protocol,
|
|
35
|
+
supports self-management like update and delete, and handles endpoint resolution.
|
|
36
|
+
|
|
37
|
+
It uses HTTP client for async communications.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
vefaas_application_name (str): Name of the VeFaaS application. Defaults to "".
|
|
41
|
+
vefaas_endpoint (str): URL for accessing the application. Resolved if not provided.
|
|
42
|
+
vefaas_application_id (str): Unique identifier of the application. Defaults to "".
|
|
43
|
+
use_agent_card (bool): Flag to resolve endpoint via agent card. Defaults to False.
|
|
44
|
+
httpx_client (httpx.AsyncClient): Async HTTP client for requests.
|
|
45
|
+
|
|
46
|
+
Note:
|
|
47
|
+
At least one of name, endpoint, or ID must be provided during init.
|
|
48
|
+
Agent card mode fetches card from the endpoint's public path.
|
|
49
|
+
|
|
50
|
+
Examples:
|
|
51
|
+
```python
|
|
52
|
+
from veadk.cloud.cloud_app import CloudApp
|
|
53
|
+
app = CloudApp(vefaas_endpoint="https://my-agent.volcengine.com")
|
|
54
|
+
response = await app.message_send("Query", "session-1", "user-123")
|
|
55
|
+
print(response.message_id)
|
|
56
|
+
```
|
|
37
57
|
"""
|
|
38
58
|
|
|
39
59
|
def __init__(
|
|
@@ -43,6 +63,31 @@ class CloudApp:
|
|
|
43
63
|
vefaas_application_id: str = "",
|
|
44
64
|
use_agent_card: bool = False,
|
|
45
65
|
):
|
|
66
|
+
"""Initializes the CloudApp with VeFaaS application details.
|
|
67
|
+
|
|
68
|
+
Sets attributes, validates inputs, resolves endpoint if missing, and creates HTTP client.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
vefaas_application_name (str, optional): Application name for lookup. Defaults to "".
|
|
72
|
+
vefaas_endpoint (str, optional): Direct endpoint URL. Defaults to "".
|
|
73
|
+
vefaas_application_id (str, optional): Application ID for lookup. Defaults to "".
|
|
74
|
+
use_agent_card (bool): Use agent card to determine invocation URL. Defaults to False.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
None
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If no app identifiers provided or endpoint lacks http/https prefix.
|
|
81
|
+
|
|
82
|
+
Note:
|
|
83
|
+
Logs info if agent card mode enabled.
|
|
84
|
+
Endpoint is fetched via _get_vefaas_endpoint if not set.
|
|
85
|
+
|
|
86
|
+
Examples:
|
|
87
|
+
```python
|
|
88
|
+
app = CloudApp(vefaas_application_id="app-123", use_agent_card=True)
|
|
89
|
+
```
|
|
90
|
+
"""
|
|
46
91
|
self.vefaas_endpoint = vefaas_endpoint
|
|
47
92
|
self.vefaas_application_id = vefaas_application_id
|
|
48
93
|
self.vefaas_application_name = vefaas_application_name
|
|
@@ -82,6 +127,29 @@ class CloudApp:
|
|
|
82
127
|
volcengine_ak: str = getenv("VOLCENGINE_ACCESS_KEY"),
|
|
83
128
|
volcengine_sk: str = getenv("VOLCENGINE_SECRET_KEY"),
|
|
84
129
|
) -> str:
|
|
130
|
+
"""Fetches the application endpoint from VeFaaS details if not directly provided.
|
|
131
|
+
|
|
132
|
+
Uses VeFaaS client to get app info and parse CloudResource JSON for URL.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
volcengine_ak (str, optional): Volcengine access key. Defaults to env var.
|
|
136
|
+
volcengine_sk (str, optional): Volcengine secret key. Defaults to env var.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
str: The system URL from CloudResource or empty string on failure.
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
ValueError: If application not found by ID or name.
|
|
143
|
+
|
|
144
|
+
Note:
|
|
145
|
+
Logs warning if JSON parsing fails; returns empty on error.
|
|
146
|
+
Called during init if endpoint missing.
|
|
147
|
+
|
|
148
|
+
Examples:
|
|
149
|
+
```python
|
|
150
|
+
endpoint = app._get_vefaas_endpoint("custom-ak", "custom-sk")
|
|
151
|
+
```
|
|
152
|
+
"""
|
|
85
153
|
from veadk.integrations.ve_faas.ve_faas import VeFaaS
|
|
86
154
|
|
|
87
155
|
vefaas_client = VeFaaS(access_key=volcengine_ak, secret_key=volcengine_sk)
|
|
@@ -105,6 +173,26 @@ class CloudApp:
|
|
|
105
173
|
return vefaas_endpoint
|
|
106
174
|
|
|
107
175
|
def _get_vefaas_application_id_by_name(self) -> str:
|
|
176
|
+
"""Retrieves the application ID using the configured name.
|
|
177
|
+
|
|
178
|
+
Instantiates VeFaaS client and queries by name.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
str: The found application ID.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
ValueError: If vefaas_application_name is not set.
|
|
185
|
+
|
|
186
|
+
Note:
|
|
187
|
+
Uses default environment credentials.
|
|
188
|
+
Internal method for ID resolution.
|
|
189
|
+
|
|
190
|
+
Examples:
|
|
191
|
+
```python
|
|
192
|
+
app.vefaas_application_name = "my-app"
|
|
193
|
+
id = app._get_vefaas_application_id_by_name()
|
|
194
|
+
```
|
|
195
|
+
"""
|
|
108
196
|
if not self.vefaas_application_name:
|
|
109
197
|
raise ValueError(
|
|
110
198
|
"VeFaaS CloudAPP must be set application_name to get application_id."
|
|
@@ -121,6 +209,20 @@ class CloudApp:
|
|
|
121
209
|
return vefaas_application_id
|
|
122
210
|
|
|
123
211
|
async def _get_a2a_client(self) -> A2AClient:
|
|
212
|
+
"""Constructs an A2A client configured for this cloud app.
|
|
213
|
+
|
|
214
|
+
If use_agent_card, resolves agent card and uses its URL; otherwise uses direct endpoint.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
self: The CloudApp instance.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
A2AClient: Ready-to-use A2A client.
|
|
221
|
+
|
|
222
|
+
Note:
|
|
223
|
+
Manages httpx_client context.
|
|
224
|
+
For card mode, fetches from base_url/ (public card).
|
|
225
|
+
"""
|
|
124
226
|
if self.use_agent_card:
|
|
125
227
|
async with self.httpx_client as httpx_client:
|
|
126
228
|
resolver = A2ACardResolver(
|
|
@@ -141,19 +243,81 @@ class CloudApp:
|
|
|
141
243
|
|
|
142
244
|
def update_self(
|
|
143
245
|
self,
|
|
246
|
+
path: str,
|
|
144
247
|
volcengine_ak: str = getenv("VOLCENGINE_ACCESS_KEY"),
|
|
145
248
|
volcengine_sk: str = getenv("VOLCENGINE_SECRET_KEY"),
|
|
146
249
|
):
|
|
250
|
+
"""Updates the configuration of this cloud application.
|
|
251
|
+
|
|
252
|
+
Currently a placeholder; implementation pending.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
volcengine_ak (str, optional): Access key for VeFaaS. Defaults to env var.
|
|
256
|
+
volcengine_sk (str, optional): Secret key for VeFaaS. Defaults to env var.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
None
|
|
260
|
+
|
|
261
|
+
Raises:
|
|
262
|
+
ValueError: If access key or secret key missing.
|
|
263
|
+
|
|
264
|
+
Examples:
|
|
265
|
+
```python
|
|
266
|
+
app.update_self("ak", "sk")
|
|
267
|
+
```
|
|
268
|
+
"""
|
|
147
269
|
if not volcengine_ak or not volcengine_sk:
|
|
148
270
|
raise ValueError("Volcengine access key and secret key must be set.")
|
|
149
271
|
|
|
150
|
-
|
|
272
|
+
if not self.vefaas_application_id:
|
|
273
|
+
self.vefaas_application_id = self._get_vefaas_application_id_by_name()
|
|
274
|
+
|
|
275
|
+
vefaas_client = VeFaaS(access_key=volcengine_ak, secret_key=volcengine_sk)
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
vefaas_application_url, app_id, function_id = (
|
|
279
|
+
vefaas_client._update_function_code(
|
|
280
|
+
application_name=self.vefaas_application_name,
|
|
281
|
+
path=path,
|
|
282
|
+
)
|
|
283
|
+
)
|
|
284
|
+
self.vefaas_endpoint = vefaas_application_url
|
|
285
|
+
self.vefaas_application_id = app_id
|
|
286
|
+
logger.info(
|
|
287
|
+
f"Cloud app {self.vefaas_application_name} updated successfully."
|
|
288
|
+
)
|
|
289
|
+
except Exception as e:
|
|
290
|
+
raise ValueError(f"Failed to update cloud app. Error: {e}")
|
|
151
291
|
|
|
152
292
|
def delete_self(
|
|
153
293
|
self,
|
|
154
294
|
volcengine_ak: str = getenv("VOLCENGINE_ACCESS_KEY"),
|
|
155
295
|
volcengine_sk: str = getenv("VOLCENGINE_SECRET_KEY"),
|
|
156
296
|
):
|
|
297
|
+
"""Deletes this cloud application after interactive confirmation.
|
|
298
|
+
|
|
299
|
+
Issues delete to VeFaaS and polls for completion.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
volcengine_ak (str, optional): Access key. Defaults to env var.
|
|
303
|
+
volcengine_sk (str, optional): Secret key. Defaults to env var.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
None
|
|
307
|
+
|
|
308
|
+
Raises:
|
|
309
|
+
ValueError: If credentials not provided.
|
|
310
|
+
|
|
311
|
+
Note:
|
|
312
|
+
Fetches ID if not set using name.
|
|
313
|
+
Polls every 3 seconds until app no longer exists.
|
|
314
|
+
Prints status messages.
|
|
315
|
+
|
|
316
|
+
Examples:
|
|
317
|
+
```python
|
|
318
|
+
app.delete_self()
|
|
319
|
+
```
|
|
320
|
+
"""
|
|
157
321
|
if not volcengine_ak or not volcengine_sk:
|
|
158
322
|
raise ValueError("Volcengine access key and secret key must be set.")
|
|
159
323
|
|
|
@@ -187,8 +351,33 @@ class CloudApp:
|
|
|
187
351
|
async def message_send(
|
|
188
352
|
self, message: str, session_id: str, user_id: str, timeout: float = 600.0
|
|
189
353
|
) -> Message | None:
|
|
190
|
-
"""
|
|
191
|
-
|
|
354
|
+
"""Sends a user message to the cloud agent and retrieves the response.
|
|
355
|
+
|
|
356
|
+
Constructs A2A SendMessageRequest and executes via client.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
message (str): Text content of the user message.
|
|
360
|
+
session_id (str): Identifier for the conversation session.
|
|
361
|
+
user_id (str): Identifier for the user.
|
|
362
|
+
timeout (float): Maximum wait time in seconds. Defaults to 600.0.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Message | None: Assistant response message or None if error occurs.
|
|
366
|
+
|
|
367
|
+
Raises:
|
|
368
|
+
Exception: Communication or processing errors; error is printed.
|
|
369
|
+
|
|
370
|
+
Note:
|
|
371
|
+
Uses UUID for message and request IDs.
|
|
372
|
+
Payload includes role 'user' and text part.
|
|
373
|
+
Debug logs the full response.
|
|
374
|
+
Ignores type checks for result as it may not be Task.
|
|
375
|
+
|
|
376
|
+
Examples:
|
|
377
|
+
```python
|
|
378
|
+
response = await app.message_send("What is AI?", "chat-1", "user-1", timeout=300)
|
|
379
|
+
print(response.content)
|
|
380
|
+
```
|
|
192
381
|
"""
|
|
193
382
|
a2a_client = await self._get_a2a_client()
|
|
194
383
|
|
|
@@ -223,13 +412,31 @@ class CloudApp:
|
|
|
223
412
|
# from CloudApp will not be `Task` type
|
|
224
413
|
return res.root.result # type: ignore
|
|
225
414
|
except Exception as e:
|
|
226
|
-
|
|
227
|
-
print(e)
|
|
415
|
+
logger.error(f"Failed to send message to cloud app. Error: {e}")
|
|
228
416
|
return None
|
|
229
417
|
|
|
230
418
|
|
|
231
419
|
def get_message_id(message: Message):
|
|
232
|
-
"""
|
|
420
|
+
"""Extracts the unique ID from an A2A Message object.
|
|
421
|
+
|
|
422
|
+
Checks for both legacy 'messageId' and current 'message_id' attributes.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
message (Message): The A2A message instance.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
str: The message identifier.
|
|
429
|
+
|
|
430
|
+
Note:
|
|
431
|
+
Ensures compatibility with a2a-python versions before and after 0.3.0.
|
|
432
|
+
Prefers 'message_id' if available, falls back to 'messageId'.
|
|
433
|
+
|
|
434
|
+
Examples:
|
|
435
|
+
```python
|
|
436
|
+
mid = get_message_id(response_message)
|
|
437
|
+
print(mid)
|
|
438
|
+
```
|
|
439
|
+
"""
|
|
233
440
|
if getattr(message, "messageId", None):
|
|
234
441
|
# Compatible with the messageId of the old a2a-python version (<0.3.0) in cloud app
|
|
235
442
|
return message.messageId # type: ignore
|
veadk/configs/model_configs.py
CHANGED
|
@@ -56,7 +56,11 @@ class EmbeddingModelConfig(BaseSettings):
|
|
|
56
56
|
|
|
57
57
|
@cached_property
|
|
58
58
|
def api_key(self) -> str:
|
|
59
|
-
return
|
|
59
|
+
return (
|
|
60
|
+
os.getenv("MODEL_EMBEDDING_API_KEY")
|
|
61
|
+
or os.getenv("MODEL_AGENT_API_KEY") # try to use agent's model api key
|
|
62
|
+
or get_ark_token()
|
|
63
|
+
)
|
|
60
64
|
|
|
61
65
|
|
|
62
66
|
class NormalEmbeddingModelConfig(BaseSettings):
|
veadk/configs/tracing_configs.py
CHANGED
|
@@ -18,7 +18,7 @@ from functools import cached_property
|
|
|
18
18
|
from pydantic import Field
|
|
19
19
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
20
20
|
|
|
21
|
-
from veadk.auth.veauth.apmplus_veauth import
|
|
21
|
+
from veadk.auth.veauth.apmplus_veauth import get_apmplus_token
|
|
22
22
|
from veadk.consts import (
|
|
23
23
|
DEFAULT_APMPLUS_OTEL_EXPORTER_ENDPOINT,
|
|
24
24
|
DEFAULT_APMPLUS_OTEL_EXPORTER_SERVICE_NAME,
|
|
@@ -46,7 +46,7 @@ class APMPlusConfig(BaseSettings):
|
|
|
46
46
|
def otel_exporter_api_key(self) -> str:
|
|
47
47
|
return (
|
|
48
48
|
os.getenv("OBSERVABILITY_OPENTELEMETRY_APMPLUS_API_KEY")
|
|
49
|
-
or
|
|
49
|
+
or get_apmplus_token()
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
@@ -39,16 +39,55 @@ import inspect
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
def formatted_timestamp():
|
|
42
|
+
"""Generates a formatted timestamp string in YYYYMMDDHHMMSS format.
|
|
43
|
+
|
|
44
|
+
This function creates a string representation of the current time.
|
|
45
|
+
It uses local time for formatting.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
str: Timestamp string like '20251028123045'.
|
|
49
|
+
"""
|
|
42
50
|
# YYYYMMDDHHMMSS
|
|
43
51
|
return time.strftime("%Y%m%d%H%M%S", time.localtime())
|
|
44
52
|
|
|
45
53
|
|
|
46
54
|
class ADKEvaluator(BaseEvaluator):
|
|
55
|
+
"""Evaluates agents using Google ADK metrics.
|
|
56
|
+
|
|
57
|
+
This class uses Google's Agent Development Kit (ADK) to test agents.
|
|
58
|
+
It checks tool usage and response quality.
|
|
59
|
+
Runs tests multiple times for reliable results.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
name (str): Name of this evaluator. Defaults to 'veadk_adk_evaluator'.
|
|
63
|
+
|
|
64
|
+
Note:
|
|
65
|
+
Works with .test.json files and folders of files.
|
|
66
|
+
Default thresholds: tool=1.0, response=0.8.
|
|
67
|
+
Runs each test multiple times (default 2) for average scores.
|
|
68
|
+
|
|
69
|
+
Examples:
|
|
70
|
+
```python
|
|
71
|
+
agent = Agent(tools=[get_city_weather])
|
|
72
|
+
evaluator = ADKEvaluator(agent=agent)
|
|
73
|
+
results, failures = await evaluator.evaluate(eval_set_file_path="test_folder")
|
|
74
|
+
```
|
|
75
|
+
"""
|
|
76
|
+
|
|
47
77
|
def __init__(
|
|
48
78
|
self,
|
|
49
79
|
agent,
|
|
50
80
|
name: str = "veadk_adk_evaluator",
|
|
51
81
|
):
|
|
82
|
+
"""Initializes the ADK evaluator with agent and name.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
agent: The agent to evaluate.
|
|
86
|
+
name (str): Name of the evaluator. Defaults to 'veadk_adk_evaluator'.
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
ValueError: If agent is invalid.
|
|
90
|
+
"""
|
|
52
91
|
super().__init__(agent=agent, name=name)
|
|
53
92
|
|
|
54
93
|
@override
|
|
@@ -62,23 +101,44 @@ class ADKEvaluator(BaseEvaluator):
|
|
|
62
101
|
num_runs: int = 2,
|
|
63
102
|
print_detailed_results: bool = True,
|
|
64
103
|
):
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
104
|
+
"""Tests agent using ADK metrics on test cases.
|
|
105
|
+
|
|
106
|
+
This method does these steps:
|
|
107
|
+
1. Finds test files in folder or single file
|
|
108
|
+
2. Sets up scoring rules with thresholds
|
|
109
|
+
3. Runs agent multiple times for each test
|
|
110
|
+
4. Converts data to ADK format
|
|
111
|
+
5. Scores tool usage and response quality
|
|
112
|
+
6. Collects results and failures
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
eval_set: Test cases in memory. If given, used first.
|
|
116
|
+
eval_set_file_path: Path to test file or folder. Used if no eval_set.
|
|
117
|
+
eval_id: Unique name for this test run.
|
|
118
|
+
tool_score_threshold: Minimum score for tool usage. 1.0 means perfect.
|
|
119
|
+
response_match_score_threshold: Minimum score for response match.
|
|
120
|
+
Uses text similarity. 0.8 is default.
|
|
121
|
+
num_runs: How many times to run each test. More runs = more reliable.
|
|
122
|
+
print_detailed_results: If True, shows detailed scores for each test.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
tuple[list, list]: Two lists:
|
|
126
|
+
- List of evaluation results with scores
|
|
127
|
+
- List of failure messages if tests failed
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
ValueError: If no test cases found or thresholds wrong.
|
|
131
|
+
FileNotFoundError: If test file not found.
|
|
132
|
+
EvaluationError: If agent fails or scoring fails.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
```python
|
|
136
|
+
results, failures = await evaluator.evaluate(
|
|
137
|
+
eval_set_file_path="tests/",
|
|
138
|
+
tool_score_threshold=0.9,
|
|
139
|
+
num_runs=3)
|
|
140
|
+
print(f"Results: {len(results)}, Failures: {len(failures)}")
|
|
141
|
+
```
|
|
82
142
|
"""
|
|
83
143
|
|
|
84
144
|
# Resolve eval files: accept a directory (scan *.test.json) or a single file
|