veadk-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/__init__.py +31 -0
- veadk/a2a/__init__.py +13 -0
- veadk/a2a/agent_card.py +45 -0
- veadk/a2a/remote_ve_agent.py +19 -0
- veadk/a2a/ve_a2a_server.py +77 -0
- veadk/a2a/ve_agent_executor.py +78 -0
- veadk/a2a/ve_task_store.py +37 -0
- veadk/agent.py +253 -0
- veadk/cli/__init__.py +13 -0
- veadk/cli/main.py +278 -0
- veadk/cli/services/agentpilot/__init__.py +17 -0
- veadk/cli/services/agentpilot/agentpilot.py +77 -0
- veadk/cli/services/veapig/__init__.py +17 -0
- veadk/cli/services/veapig/apig.py +224 -0
- veadk/cli/services/veapig/apig_utils.py +332 -0
- veadk/cli/services/vefaas/__init__.py +17 -0
- veadk/cli/services/vefaas/template/deploy.py +44 -0
- veadk/cli/services/vefaas/template/src/app.py +30 -0
- veadk/cli/services/vefaas/template/src/config.py +58 -0
- veadk/cli/services/vefaas/vefaas.py +346 -0
- veadk/cli/services/vefaas/vefaas_utils.py +408 -0
- veadk/cli/services/vetls/__init__.py +17 -0
- veadk/cli/services/vetls/vetls.py +87 -0
- veadk/cli/studio/__init__.py +13 -0
- veadk/cli/studio/agent_processor.py +247 -0
- veadk/cli/studio/fast_api.py +232 -0
- veadk/cli/studio/model.py +116 -0
- veadk/cloud/__init__.py +13 -0
- veadk/cloud/cloud_agent_engine.py +144 -0
- veadk/cloud/cloud_app.py +123 -0
- veadk/cloud/template/app.py +30 -0
- veadk/cloud/template/config.py +55 -0
- veadk/config.py +131 -0
- veadk/consts.py +17 -0
- veadk/database/__init__.py +17 -0
- veadk/database/base_database.py +45 -0
- veadk/database/database_factory.py +80 -0
- veadk/database/kv/__init__.py +13 -0
- veadk/database/kv/redis_database.py +109 -0
- veadk/database/local_database.py +43 -0
- veadk/database/relational/__init__.py +13 -0
- veadk/database/relational/mysql_database.py +114 -0
- veadk/database/vector/__init__.py +13 -0
- veadk/database/vector/opensearch_vector_database.py +205 -0
- veadk/database/vector/type.py +50 -0
- veadk/database/viking/__init__.py +13 -0
- veadk/database/viking/viking_database.py +378 -0
- veadk/database/viking/viking_memory_db.py +521 -0
- veadk/evaluation/__init__.py +17 -0
- veadk/evaluation/adk_evaluator/__init__.py +13 -0
- veadk/evaluation/adk_evaluator/adk_evaluator.py +291 -0
- veadk/evaluation/base_evaluator.py +242 -0
- veadk/evaluation/deepeval_evaluator/__init__.py +17 -0
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +223 -0
- veadk/evaluation/eval_set_file_loader.py +28 -0
- veadk/evaluation/eval_set_recorder.py +91 -0
- veadk/evaluation/utils/prometheus.py +142 -0
- veadk/knowledgebase/__init__.py +17 -0
- veadk/knowledgebase/knowledgebase.py +83 -0
- veadk/knowledgebase/knowledgebase_database_adapter.py +259 -0
- veadk/memory/__init__.py +13 -0
- veadk/memory/long_term_memory.py +119 -0
- veadk/memory/memory_database_adapter.py +235 -0
- veadk/memory/short_term_memory.py +124 -0
- veadk/memory/short_term_memory_processor.py +90 -0
- veadk/prompts/__init__.py +13 -0
- veadk/prompts/agent_default_prompt.py +30 -0
- veadk/prompts/prompt_evaluator.py +20 -0
- veadk/prompts/prompt_memory_processor.py +55 -0
- veadk/prompts/prompt_optimization.py +158 -0
- veadk/runner.py +252 -0
- veadk/tools/__init__.py +13 -0
- veadk/tools/builtin_tools/__init__.py +13 -0
- veadk/tools/builtin_tools/lark.py +67 -0
- veadk/tools/builtin_tools/las.py +23 -0
- veadk/tools/builtin_tools/vesearch.py +49 -0
- veadk/tools/builtin_tools/web_scraper.py +76 -0
- veadk/tools/builtin_tools/web_search.py +192 -0
- veadk/tools/demo_tools.py +58 -0
- veadk/tools/load_knowledgebase_tool.py +144 -0
- veadk/tools/sandbox/__init__.py +13 -0
- veadk/tools/sandbox/browser_sandbox.py +27 -0
- veadk/tools/sandbox/code_sandbox.py +30 -0
- veadk/tools/sandbox/computer_sandbox.py +27 -0
- veadk/tracing/__init__.py +13 -0
- veadk/tracing/base_tracer.py +172 -0
- veadk/tracing/telemetry/__init__.py +13 -0
- veadk/tracing/telemetry/exporters/__init__.py +13 -0
- veadk/tracing/telemetry/exporters/apiserver_exporter.py +60 -0
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +101 -0
- veadk/tracing/telemetry/exporters/base_exporter.py +28 -0
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +69 -0
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +88 -0
- veadk/tracing/telemetry/exporters/tls_exporter.py +78 -0
- veadk/tracing/telemetry/metrics/__init__.py +13 -0
- veadk/tracing/telemetry/metrics/opentelemetry_metrics.py +73 -0
- veadk/tracing/telemetry/opentelemetry_tracer.py +167 -0
- veadk/types.py +23 -0
- veadk/utils/__init__.py +13 -0
- veadk/utils/logger.py +59 -0
- veadk/utils/misc.py +33 -0
- veadk/utils/patches.py +85 -0
- veadk/utils/volcengine_sign.py +199 -0
- veadk/version.py +15 -0
- veadk_python-0.1.0.dist-info/METADATA +124 -0
- veadk_python-0.1.0.dist-info/RECORD +110 -0
- veadk_python-0.1.0.dist-info/WHEEL +5 -0
- veadk_python-0.1.0.dist-info/entry_points.txt +2 -0
- veadk_python-0.1.0.dist-info/licenses/LICENSE +201 -0
- veadk_python-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
|
|
17
|
+
from deepeval import evaluate
|
|
18
|
+
from deepeval.evaluate.types import EvaluationResult
|
|
19
|
+
from deepeval.key_handler import KEY_FILE_HANDLER, KeyValues
|
|
20
|
+
from deepeval.metrics import BaseMetric
|
|
21
|
+
from deepeval.models import LocalModel
|
|
22
|
+
from deepeval.test_case import LLMTestCase
|
|
23
|
+
from deepeval.test_case.llm_test_case import ToolCall
|
|
24
|
+
from typing_extensions import override
|
|
25
|
+
|
|
26
|
+
from veadk.config import getenv
|
|
27
|
+
from veadk.utils.logger import get_logger
|
|
28
|
+
|
|
29
|
+
from ..base_evaluator import BaseEvaluator, EvalResultData, MetricResult
|
|
30
|
+
from ..utils.prometheus import (
|
|
31
|
+
EvalResultCaseData,
|
|
32
|
+
EvalResultMetadata,
|
|
33
|
+
PrometheusPushgatewayConfig,
|
|
34
|
+
push_to_prometheus,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
logger = get_logger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def formatted_timestamp():
|
|
41
|
+
# YYYYMMDDHHMMSS
|
|
42
|
+
return time.strftime("%Y%m%d%H%M%S", time.localtime())
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DeepevalEvaluator(BaseEvaluator):
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
agent,
|
|
49
|
+
judge_model_api_key: str = getenv("MODEL_JUDGE_API_KEY"),
|
|
50
|
+
judge_model_name: str = getenv(
|
|
51
|
+
"MODEL_JUDGE_NAME",
|
|
52
|
+
"doubao-seed-1-6-250615",
|
|
53
|
+
),
|
|
54
|
+
judge_model_api_base: str = getenv(
|
|
55
|
+
"MODEL_JUDGE_API_BASE",
|
|
56
|
+
"https://ark.cn-beijing.volces.com/api/v3/",
|
|
57
|
+
),
|
|
58
|
+
name: str = "veadk_deepeval_evaluator",
|
|
59
|
+
prometheus_config: PrometheusPushgatewayConfig = None,
|
|
60
|
+
):
|
|
61
|
+
super().__init__(agent=agent, name=name)
|
|
62
|
+
|
|
63
|
+
self.judge_model_name = judge_model_name
|
|
64
|
+
self.judge_model = self.create_judge_model(
|
|
65
|
+
model_name=judge_model_name,
|
|
66
|
+
api_key=judge_model_api_key,
|
|
67
|
+
api_base=judge_model_api_base,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
self.prometheus_config = prometheus_config
|
|
71
|
+
|
|
72
|
+
def create_judge_model(
|
|
73
|
+
self,
|
|
74
|
+
model_name: str,
|
|
75
|
+
api_key: str,
|
|
76
|
+
api_base: str,
|
|
77
|
+
):
|
|
78
|
+
KEY_FILE_HANDLER.write_key(KeyValues.LOCAL_MODEL_NAME, model_name)
|
|
79
|
+
KEY_FILE_HANDLER.write_key(KeyValues.LOCAL_MODEL_BASE_URL, api_base)
|
|
80
|
+
KEY_FILE_HANDLER.write_key(KeyValues.LOCAL_MODEL_API_KEY, api_key)
|
|
81
|
+
KEY_FILE_HANDLER.write_key(KeyValues.USE_LOCAL_MODEL, "YES")
|
|
82
|
+
KEY_FILE_HANDLER.write_key(KeyValues.USE_AZURE_OPENAI, "NO")
|
|
83
|
+
return LocalModel()
|
|
84
|
+
|
|
85
|
+
@override
|
|
86
|
+
async def eval(
|
|
87
|
+
self,
|
|
88
|
+
eval_set_file_path: str,
|
|
89
|
+
metrics: list[BaseMetric],
|
|
90
|
+
eval_id: str = f"test_{formatted_timestamp()}",
|
|
91
|
+
):
|
|
92
|
+
"""Target to Google ADK, we will use the same evaluation case format as Google ADK."""
|
|
93
|
+
|
|
94
|
+
# Get evaluation data by parsing eval set file
|
|
95
|
+
self.generate_eval_data(eval_set_file_path)
|
|
96
|
+
# Get actual data by running agent
|
|
97
|
+
logger.info("Start to run agent for actual data.")
|
|
98
|
+
await self._run_agent_for_actual_data()
|
|
99
|
+
eval_case_data_list = self.invocation_list
|
|
100
|
+
|
|
101
|
+
# Build test cases in Deepeval format
|
|
102
|
+
logger.info("Start to build test cases in Deepeval format.")
|
|
103
|
+
test_cases = []
|
|
104
|
+
for eval_case_data in eval_case_data_list:
|
|
105
|
+
for invocation in eval_case_data.invocations:
|
|
106
|
+
invocations_context_actual: str = (
|
|
107
|
+
"" # {"role": "user", "content": "xxxxx"}
|
|
108
|
+
)
|
|
109
|
+
invocations_context_expect: str = ""
|
|
110
|
+
|
|
111
|
+
test_case = LLMTestCase(
|
|
112
|
+
input=invocation.input,
|
|
113
|
+
actual_output=invocation.actual_output,
|
|
114
|
+
expected_output=invocation.expected_output,
|
|
115
|
+
tools_called=[
|
|
116
|
+
ToolCall(name=tool["name"], input_parameters=tool["args"])
|
|
117
|
+
for tool in invocation.actual_tool
|
|
118
|
+
],
|
|
119
|
+
expected_tools=[
|
|
120
|
+
ToolCall(name=tool["name"], input_parameters=tool["args"])
|
|
121
|
+
for tool in invocation.expected_tool
|
|
122
|
+
],
|
|
123
|
+
additional_metadata={"latency": invocation.latency},
|
|
124
|
+
context=[
|
|
125
|
+
"actual_conversation_history: "
|
|
126
|
+
+ (invocations_context_actual or "Empty"),
|
|
127
|
+
"expect_conversation_history: "
|
|
128
|
+
+ (invocations_context_expect or "Empty"),
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
invocations_context_actual += (
|
|
132
|
+
f'{{"role": "user", "content": "{invocation.input}"}}\n'
|
|
133
|
+
)
|
|
134
|
+
invocations_context_actual += f'{{"role": "assistant", "content": "{invocation.actual_output}"}}\n'
|
|
135
|
+
invocations_context_expect += (
|
|
136
|
+
f'{{"role": "user", "content": "{invocation.input}"}}\n'
|
|
137
|
+
)
|
|
138
|
+
invocations_context_expect += f'{{"role": "assistant", "content": "{invocation.expected_output}"}}\n'
|
|
139
|
+
|
|
140
|
+
test_cases.append(test_case)
|
|
141
|
+
|
|
142
|
+
# Run Deepeval evaluation according to metrics
|
|
143
|
+
logger.info("Start to run Deepeval evaluation according to metrics.")
|
|
144
|
+
test_results = evaluate(test_cases=test_cases, metrics=metrics)
|
|
145
|
+
for test_result in test_results.test_results:
|
|
146
|
+
eval_result_data = EvalResultData(metric_results=[])
|
|
147
|
+
for metrics_data_item in test_result.metrics_data:
|
|
148
|
+
metric_result = MetricResult(
|
|
149
|
+
metric_type=metrics_data_item.name,
|
|
150
|
+
success=metrics_data_item.success,
|
|
151
|
+
score=metrics_data_item.score,
|
|
152
|
+
reason=metrics_data_item.reason,
|
|
153
|
+
)
|
|
154
|
+
eval_result_data.metric_results.append(metric_result)
|
|
155
|
+
|
|
156
|
+
eval_result_data.call_before_append() # calculate average score and generate total reason
|
|
157
|
+
self.result_list.append(eval_result_data)
|
|
158
|
+
self.result_list.reverse() # deepeval test_results is in reverse order
|
|
159
|
+
|
|
160
|
+
# export to Prometheus if needed
|
|
161
|
+
if self.prometheus_config is not None:
|
|
162
|
+
self.export_results(eval_id, test_results)
|
|
163
|
+
|
|
164
|
+
return test_results
|
|
165
|
+
|
|
166
|
+
def export_results(self, eval_id: str, test_results: EvaluationResult):
|
|
167
|
+
# fixed attributions
|
|
168
|
+
test_name = eval_id
|
|
169
|
+
test_cases_total = len(test_results.test_results)
|
|
170
|
+
eval_data = EvalResultMetadata(
|
|
171
|
+
tested_model=self.agent.model_name,
|
|
172
|
+
judge_model=self.judge_model_name,
|
|
173
|
+
)
|
|
174
|
+
# parsed attributions
|
|
175
|
+
test_cases_failure = 0
|
|
176
|
+
test_cases_pass = 0
|
|
177
|
+
test_data_list = []
|
|
178
|
+
# NOTE: we hard-coding the following two attributions for development
|
|
179
|
+
case_threshold = 0.5
|
|
180
|
+
diff_threshold = 0.2
|
|
181
|
+
|
|
182
|
+
for idx, test_result in enumerate(test_results.test_results):
|
|
183
|
+
pass_flag = "PASSED"
|
|
184
|
+
if test_result.success:
|
|
185
|
+
test_cases_pass += 1
|
|
186
|
+
else:
|
|
187
|
+
pass_flag = "FAILURE"
|
|
188
|
+
test_cases_failure += 1
|
|
189
|
+
|
|
190
|
+
test_data_list.append(
|
|
191
|
+
EvalResultCaseData(
|
|
192
|
+
id=str(idx),
|
|
193
|
+
input=test_result.input,
|
|
194
|
+
actual_output=test_result.actual_output,
|
|
195
|
+
expected_output=test_result.expected_output,
|
|
196
|
+
# [temporary] score: This method is not generally applicable now and is currently only available in the GEval mode.
|
|
197
|
+
score=str(test_result.metrics_data[0].score),
|
|
198
|
+
reason=test_result.metrics_data[0].reason,
|
|
199
|
+
status=pass_flag,
|
|
200
|
+
latency=test_result.additional_metadata["latency"],
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
exported_data = {
|
|
205
|
+
"test_name": test_name,
|
|
206
|
+
"test_cases_total": test_cases_total,
|
|
207
|
+
"test_cases_failure": test_cases_failure,
|
|
208
|
+
"test_cases_pass": test_cases_pass,
|
|
209
|
+
"test_data_list": test_data_list,
|
|
210
|
+
"eval_data": eval_data,
|
|
211
|
+
"case_threshold": case_threshold,
|
|
212
|
+
"diff_threshold": diff_threshold,
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
push_to_prometheus(
|
|
216
|
+
**exported_data,
|
|
217
|
+
url=self.prometheus_config.url,
|
|
218
|
+
username=self.prometheus_config.username,
|
|
219
|
+
password=self.prometheus_config.password,
|
|
220
|
+
)
|
|
221
|
+
logger.info(
|
|
222
|
+
f"Upload to Prometheus Pushgateway ({self.prometheus_config.url}) successfully! Test name: {eval_id}"
|
|
223
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from google.adk.evaluation.eval_set import EvalSet
|
|
16
|
+
from google.adk.evaluation.local_eval_sets_manager import (
|
|
17
|
+
load_eval_set_from_file as adk_load_eval_set_from_file,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def load_eval_set_from_file(eval_set_file_path: str) -> EvalSet:
|
|
22
|
+
try:
|
|
23
|
+
eval_set = adk_load_eval_set_from_file(eval_set_file_path, eval_set_file_path)
|
|
24
|
+
except Exception as e:
|
|
25
|
+
raise Exception(
|
|
26
|
+
f"Failed to load eval set from file {eval_set_file_path}"
|
|
27
|
+
) from e
|
|
28
|
+
return eval_set
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from google.adk.cli.utils import evals
|
|
19
|
+
from google.adk.evaluation.eval_case import EvalCase, SessionInput
|
|
20
|
+
from google.adk.evaluation.local_eval_sets_manager import LocalEvalSetsManager
|
|
21
|
+
from google.adk.sessions import BaseSessionService
|
|
22
|
+
|
|
23
|
+
from veadk.utils.logger import get_logger
|
|
24
|
+
from veadk.utils.misc import formatted_timestamp
|
|
25
|
+
|
|
26
|
+
logger = get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EvalSetRecorder(LocalEvalSetsManager):
|
|
30
|
+
def __init__(
|
|
31
|
+
self, session_service: BaseSessionService, eval_set_id: str = "default"
|
|
32
|
+
):
|
|
33
|
+
super().__init__(agents_dir="/tmp/")
|
|
34
|
+
self.eval_set_id = eval_set_id if eval_set_id != "" else "default"
|
|
35
|
+
self.session_service: BaseSessionService = session_service
|
|
36
|
+
|
|
37
|
+
# adapted from google.adk.cli.fast_api
|
|
38
|
+
async def add_session_to_eval_set(
|
|
39
|
+
self,
|
|
40
|
+
app_name: str,
|
|
41
|
+
eval_set_id: str,
|
|
42
|
+
session_id: str,
|
|
43
|
+
user_id: str,
|
|
44
|
+
):
|
|
45
|
+
eval_id = f"veadk_eval_{formatted_timestamp()}"
|
|
46
|
+
|
|
47
|
+
# Get the session
|
|
48
|
+
session = await self.session_service.get_session(
|
|
49
|
+
app_name=app_name, user_id=user_id, session_id=session_id
|
|
50
|
+
)
|
|
51
|
+
assert session, "Session not found."
|
|
52
|
+
|
|
53
|
+
# Convert the session data to eval invocations
|
|
54
|
+
invocations = evals.convert_session_to_eval_invocations(session)
|
|
55
|
+
|
|
56
|
+
# Populate the session with initial session state.
|
|
57
|
+
# initial_session_state = create_empty_state(agent_loader.load_agent(app_name))
|
|
58
|
+
|
|
59
|
+
new_eval_case = EvalCase(
|
|
60
|
+
eval_id=eval_id,
|
|
61
|
+
conversation=invocations,
|
|
62
|
+
session_input=SessionInput(app_name=app_name, user_id=user_id),
|
|
63
|
+
creation_timestamp=time.time(),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
self.add_eval_case(app_name, eval_set_id, new_eval_case)
|
|
68
|
+
except ValueError as ve:
|
|
69
|
+
raise ValueError(f"Add eval case to eval set error: {ve}")
|
|
70
|
+
|
|
71
|
+
async def dump(
|
|
72
|
+
self,
|
|
73
|
+
app_name: str,
|
|
74
|
+
user_id: str,
|
|
75
|
+
session_id: str,
|
|
76
|
+
) -> str:
|
|
77
|
+
dump_path = self._get_eval_set_file_path(app_name, self.eval_set_id)
|
|
78
|
+
Path(dump_path).parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
|
|
80
|
+
self.create_eval_set(app_name=app_name, eval_set_id=self.eval_set_id)
|
|
81
|
+
|
|
82
|
+
await self.add_session_to_eval_set(
|
|
83
|
+
app_name=app_name,
|
|
84
|
+
eval_set_id=self.eval_set_id,
|
|
85
|
+
session_id=session_id,
|
|
86
|
+
user_id=user_id,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
logger.info(f"Dump eval set to {dump_path}")
|
|
90
|
+
|
|
91
|
+
return dump_path
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
|
|
18
|
+
from prometheus_client.exposition import basic_auth_handler
|
|
19
|
+
|
|
20
|
+
from veadk.config import getenv
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class EvalResultCaseData:
|
|
25
|
+
id: str
|
|
26
|
+
input: str
|
|
27
|
+
actual_output: str
|
|
28
|
+
expected_output: str
|
|
29
|
+
score: str
|
|
30
|
+
reason: str
|
|
31
|
+
status: str # `PASSED` or `FAILURE`
|
|
32
|
+
latency: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class EvalResultMetadata:
|
|
37
|
+
tested_model: str
|
|
38
|
+
judge_model: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class PrometheusPushgatewayConfig:
|
|
42
|
+
url: str = getenv(
|
|
43
|
+
"OBSERVABILITY_PROMETHEUS_PUSHGATEWAY_URL",
|
|
44
|
+
)
|
|
45
|
+
username: str = getenv("OBSERVABILITY_PROMETHEUS_USERNAME")
|
|
46
|
+
password: str = getenv("OBSERVABILITY_PROMETHEUS_PASSWORD")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
registry = CollectorRegistry()
|
|
50
|
+
|
|
51
|
+
test_cases_total_metric = Gauge(
|
|
52
|
+
"test_cases_total",
|
|
53
|
+
"Total number of test cases in this evaluation",
|
|
54
|
+
registry=registry,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
test_cases_success_metric = Gauge(
|
|
58
|
+
"test_cases_success", "Success number of test cases", registry=registry
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
test_cases_pass_metric = Gauge(
|
|
62
|
+
"test_cases_pass", "Passed number of test cases", registry=registry
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
test_cases_failure_metric = Gauge(
|
|
66
|
+
"test_cases_failure", "Failuer number of test cases", registry=registry
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
case_threshold_metric = Gauge("threshold", "Threshold of test cases", registry=registry)
|
|
70
|
+
diff_threshold_metric = Gauge(
|
|
71
|
+
"diff_threshold", "Diff threshold of test cases", registry=registry
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
test_cases_data_metric = Gauge(
|
|
75
|
+
"test_cases_data",
|
|
76
|
+
"Specific data of test cases",
|
|
77
|
+
registry=registry,
|
|
78
|
+
labelnames=["data"],
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
eval_data_metric = Gauge(
|
|
82
|
+
"eval_data",
|
|
83
|
+
"Specific data of evaluation",
|
|
84
|
+
registry=registry,
|
|
85
|
+
labelnames=["data"],
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def post_pushgateway(
|
|
90
|
+
pushgateway_url: str,
|
|
91
|
+
username: str,
|
|
92
|
+
password: str,
|
|
93
|
+
job_name: str,
|
|
94
|
+
registry: CollectorRegistry,
|
|
95
|
+
grouping_key: dict[str, str] = None,
|
|
96
|
+
):
|
|
97
|
+
def auth_handler(url, method, timeout, headers, data):
|
|
98
|
+
return basic_auth_handler(
|
|
99
|
+
url, method, timeout, headers, data, username, password
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
push_to_gateway(
|
|
103
|
+
gateway=pushgateway_url,
|
|
104
|
+
job=job_name,
|
|
105
|
+
registry=registry,
|
|
106
|
+
grouping_key=grouping_key,
|
|
107
|
+
handler=auth_handler,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def push_to_prometheus(
|
|
112
|
+
test_name: str,
|
|
113
|
+
test_cases_total: int,
|
|
114
|
+
test_cases_failure: int,
|
|
115
|
+
test_cases_pass: int,
|
|
116
|
+
test_data_list: list[EvalResultCaseData],
|
|
117
|
+
eval_data: EvalResultMetadata,
|
|
118
|
+
case_threshold: float = 0.5,
|
|
119
|
+
diff_threshold: float = 0.2,
|
|
120
|
+
url: str = "",
|
|
121
|
+
username: str = "",
|
|
122
|
+
password: str = "",
|
|
123
|
+
):
|
|
124
|
+
test_cases_total_metric.set(test_cases_total)
|
|
125
|
+
test_cases_failure_metric.set(test_cases_failure)
|
|
126
|
+
test_cases_pass_metric.set(test_cases_pass)
|
|
127
|
+
|
|
128
|
+
for test_data in test_data_list:
|
|
129
|
+
test_cases_data_metric.labels(data=str(test_data.__dict__)).set(1)
|
|
130
|
+
|
|
131
|
+
eval_data_metric.labels(data=str(eval_data.__dict__)).set(1)
|
|
132
|
+
case_threshold_metric.set(case_threshold)
|
|
133
|
+
diff_threshold_metric.set(diff_threshold)
|
|
134
|
+
|
|
135
|
+
post_pushgateway(
|
|
136
|
+
pushgateway_url=url,
|
|
137
|
+
username=username,
|
|
138
|
+
password=password,
|
|
139
|
+
job_name="veadk_eval_job",
|
|
140
|
+
registry=registry,
|
|
141
|
+
grouping_key={"test_name": test_name},
|
|
142
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .knowledgebase import KnowledgeBase
|
|
16
|
+
|
|
17
|
+
__all__ = ["KnowledgeBase"]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import BinaryIO, Literal, TextIO
|
|
16
|
+
|
|
17
|
+
from veadk.database.database_factory import DatabaseFactory
|
|
18
|
+
from veadk.utils.logger import get_logger
|
|
19
|
+
|
|
20
|
+
from .knowledgebase_database_adapter import get_knowledgebase_adapter
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class KnowledgeBase:
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
backend: Literal["local", "opensearch", "viking", "redis", "mysql"] = "local",
|
|
29
|
+
top_k: int = 5,
|
|
30
|
+
db_config=None,
|
|
31
|
+
):
|
|
32
|
+
logger.debug(f"Create knowledgebase, backend is {backend}")
|
|
33
|
+
self.backend = backend
|
|
34
|
+
self.top_k = top_k
|
|
35
|
+
|
|
36
|
+
self.db_client = DatabaseFactory.create(backend=backend, config=db_config)
|
|
37
|
+
self.adapter = get_knowledgebase_adapter(backend)(
|
|
38
|
+
database_client=self.db_client
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def add(
|
|
42
|
+
self,
|
|
43
|
+
data: str | list[str] | TextIO | BinaryIO | bytes,
|
|
44
|
+
app_name: str,
|
|
45
|
+
**kwargs,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
Add documents to the vector database.
|
|
49
|
+
You can only upload files or file characters when the adapter type you use is vikingdb.
|
|
50
|
+
In addition, if you upload data of the bytes type,
|
|
51
|
+
for example, if you read the file stream of a pdf, then you need to pass an additional parameter file_ext = '.pdf'.
|
|
52
|
+
"""
|
|
53
|
+
kwargs.pop("session_id", None) # remove session_id
|
|
54
|
+
self.adapter.add(
|
|
55
|
+
data, app_name, user_id="user_id", session_id="session_id", **kwargs
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def search(self, query: str, app_name: str, top_k: int = None) -> list[str]:
|
|
59
|
+
"""Retrieve documents similar to the query text in the vector database.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
query (str): The query text to be retrieved (e.g., "Who proposed the Turing machine model?")
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
list[str]: A list of the top most similar document contents retrieved (sorted by vector similarity)
|
|
66
|
+
"""
|
|
67
|
+
top_k = self.top_k if top_k is None else top_k
|
|
68
|
+
|
|
69
|
+
result = self.adapter.query(
|
|
70
|
+
query=query, app_name=app_name, user_id="user_id", top_k=top_k
|
|
71
|
+
)
|
|
72
|
+
if len(result) == 0:
|
|
73
|
+
logger.warning(f"No documents found in knowledgebase. Query: {query}")
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
def delete(self, app_name: str, user_id: str, session_id: str):
|
|
77
|
+
"""Delete documents in the vector database.
|
|
78
|
+
Args:
|
|
79
|
+
app_name (str): The name of the application
|
|
80
|
+
user_id (str): The user ID
|
|
81
|
+
session_id (str): The session ID
|
|
82
|
+
"""
|
|
83
|
+
self.adapter.delete(app_name=app_name, user_id=user_id, session_id=session_id)
|