xgae 0.1.20__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xgae might be problematic. Click here for more details.
- {xgae-0.1.20 → xgae-0.2.0}/CHANGELOG.md +12 -3
- {xgae-0.1.20 → xgae-0.2.0}/PKG-INFO +1 -1
- {xgae-0.1.20 → xgae-0.2.0}/pyproject.toml +1 -1
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/agent/langgraph/react/agent_base.py +3 -2
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/agent/langgraph/react/react_agent.py +104 -34
- xgae-0.1.20/src/examples/agent/langgraph/react/final_result_agent.py → xgae-0.2.0/src/examples/agent/langgraph/react/result_eval_agent.py +14 -8
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/task_engine.py +2 -1
- {xgae-0.1.20 → xgae-0.2.0}/templates/agent_tool_prompt_template.txt +1 -0
- {xgae-0.1.20 → xgae-0.2.0}/templates/custom_tool_prompt_template.txt +11 -8
- xgae-0.1.20/templates/example/final_result_template.txt → xgae-0.2.0/templates/example/result_eval_template.txt +10 -5
- {xgae-0.1.20 → xgae-0.2.0}/templates/general_tool_prompt_template.txt +1 -0
- xgae-0.2.0/uv.lock +1463 -0
- xgae-0.1.20/uv.lock +0 -1463
- {xgae-0.1.20 → xgae-0.2.0}/.env +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/.python-version +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/README.md +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/mcpservers/custom_servers.json +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/mcpservers/xga_server.json +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/mcpservers/xga_server_sse.json +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/agent/langgraph/react/run_react_agent.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_custom_and_agent_tools.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_general_tools.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_human_in_loop.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_simple.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/tools/custom_fault_tools_app.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/examples/tools/simu_a2a_tools_app.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/__init__.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/engine_base.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/mcp_tool_box.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/prompt_builder.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/responser/non_stream_responser.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/responser/responser_base.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/responser/stream_responser.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/task_langfuse.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine_cli_app.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/tools/without_general_tools_app.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/__init__.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/json_helpers.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/llm_client.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/misc.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/setup_env.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/xml_tool_parser.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/templates/example/fault_user_prompt.txt +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/templates/gemini_system_prompt_template.txt +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/templates/system_prompt_response_sample.txt +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/templates/system_prompt_template.txt +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/test/test_langfuse.py +0 -0
- {xgae-0.1.20 → xgae-0.2.0}/test/test_litellm_langfuse.py +0 -0
|
@@ -1,12 +1,21 @@
|
|
|
1
1
|
# Release Changelog
|
|
2
|
-
## [0.
|
|
2
|
+
## [0.2.0] - 2025-9-10
|
|
3
3
|
### Added
|
|
4
|
-
-
|
|
4
|
+
- Agent Engine release 0.2
|
|
5
|
+
- Example: Langgraph ReactAgent release 0.2
|
|
6
|
+
### Fixed
|
|
7
|
+
- Agent Engine: call mcp tool fail, call 'ask' tool again and again
|
|
8
|
+
- Example Langgraph ReactAgent: retry on 'ask', user_input is ask answer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## [0.1.20] - 2025-9-9
|
|
12
|
+
### Added
|
|
13
|
+
- Example: Langgraph ReactAgent add final_result_agent
|
|
5
14
|
|
|
6
15
|
|
|
7
16
|
## [0.1.19] - 2025-9-8
|
|
8
17
|
### Added
|
|
9
|
-
- Example: Langgraph
|
|
18
|
+
- Example: Langgraph ReactAgent release V1, full logic but no final result agent and tool select agent
|
|
10
19
|
|
|
11
20
|
|
|
12
21
|
# Release Changelog
|
|
@@ -17,7 +17,7 @@ class AgentContext(TypedDict, total=False):
|
|
|
17
17
|
class TaskState(TypedDict, total=False):
|
|
18
18
|
"""State definition for the agent orchestration graph"""
|
|
19
19
|
llm_messages: List[Dict[str, Any]]
|
|
20
|
-
|
|
20
|
+
user_inputs: List[str]
|
|
21
21
|
next_node: str
|
|
22
22
|
system_prompt: str
|
|
23
23
|
custom_tools: List[str]
|
|
@@ -25,7 +25,8 @@ class TaskState(TypedDict, total=False):
|
|
|
25
25
|
task_result: XGATaskResult
|
|
26
26
|
final_result: XGATaskResult
|
|
27
27
|
eval_result: EvaluateResult
|
|
28
|
-
|
|
28
|
+
retry_count: int
|
|
29
|
+
task_no: int
|
|
29
30
|
agent_context: AgentContext
|
|
30
31
|
|
|
31
32
|
|
|
@@ -21,7 +21,7 @@ from xgae.engine.mcp_tool_box import XGAMcpToolBox
|
|
|
21
21
|
from xgae.engine.task_engine import XGATaskEngine
|
|
22
22
|
|
|
23
23
|
from examples.agent.langgraph.react.agent_base import AgentContext, TaskState, EvaluateResult
|
|
24
|
-
from examples.agent.langgraph.react.
|
|
24
|
+
from examples.agent.langgraph.react.result_eval_agent import TaskResultEvalAgent
|
|
25
25
|
|
|
26
26
|
class XGAReactAgent:
|
|
27
27
|
MAX_TASK_RETRY = 2
|
|
@@ -35,7 +35,8 @@ class XGAReactAgent:
|
|
|
35
35
|
self.task_engine: XGATaskEngine = None
|
|
36
36
|
|
|
37
37
|
self.tool_box = XGAMcpToolBox(custom_mcp_server_file="mcpservers/custom_servers.json")
|
|
38
|
-
self.
|
|
38
|
+
self.result_eval_agent = TaskResultEvalAgent()
|
|
39
|
+
|
|
39
40
|
|
|
40
41
|
async def _create_graph(self) -> StateGraph:
|
|
41
42
|
try:
|
|
@@ -43,6 +44,7 @@ class XGAReactAgent:
|
|
|
43
44
|
|
|
44
45
|
# Add nodes
|
|
45
46
|
graph_builder.add_node('supervisor', self._supervisor_node)
|
|
47
|
+
graph_builder.add_node('prompt_optimize', self._prompt_optimize_node)
|
|
46
48
|
graph_builder.add_node('select_tool', self._select_tool_node)
|
|
47
49
|
graph_builder.add_node('exec_task', self._exec_task_node)
|
|
48
50
|
graph_builder.add_node('final_result', self._final_result_node)
|
|
@@ -53,12 +55,14 @@ class XGAReactAgent:
|
|
|
53
55
|
'supervisor',
|
|
54
56
|
self._next_condition,
|
|
55
57
|
{
|
|
56
|
-
'select_tool': 'select_tool',
|
|
57
|
-
'exec_task': 'exec_task',
|
|
58
|
-
'
|
|
58
|
+
'select_tool' : 'select_tool',
|
|
59
|
+
'exec_task' : 'exec_task',
|
|
60
|
+
'prompt_optimize' : 'prompt_optimize',
|
|
61
|
+
'end' : END
|
|
59
62
|
}
|
|
60
63
|
)
|
|
61
64
|
|
|
65
|
+
graph_builder.add_edge('prompt_optimize', 'select_tool')
|
|
62
66
|
graph_builder.add_edge('select_tool', 'exec_task')
|
|
63
67
|
graph_builder.add_edge('exec_task', 'final_result')
|
|
64
68
|
|
|
@@ -67,8 +71,8 @@ class XGAReactAgent:
|
|
|
67
71
|
self._next_condition,
|
|
68
72
|
{
|
|
69
73
|
'supervisor': 'supervisor',
|
|
70
|
-
'exec_task': 'exec_task',
|
|
71
|
-
'end': END
|
|
74
|
+
'exec_task' : 'exec_task',
|
|
75
|
+
'end' : END
|
|
72
76
|
}
|
|
73
77
|
)
|
|
74
78
|
|
|
@@ -80,64 +84,104 @@ class XGAReactAgent:
|
|
|
80
84
|
logging.error("Failed to create XGARectAgent Graph: %s", str(e))
|
|
81
85
|
raise
|
|
82
86
|
|
|
87
|
+
|
|
83
88
|
def _search_system_prompt(self, user_input: str) -> str:
|
|
84
89
|
# You should search RAG use user_input, fetch COT or Prompt for your business
|
|
85
90
|
system_prompt = None if "fault" not in user_input else read_file("templates/example/fault_user_prompt.txt")
|
|
86
91
|
return system_prompt
|
|
87
92
|
|
|
93
|
+
|
|
88
94
|
async def _supervisor_node(self, state: TaskState) -> Dict[str, Any]:
|
|
89
|
-
user_input = state['
|
|
95
|
+
user_input = state['user_inputs'][0]
|
|
90
96
|
eval_result = state.get('eval_result', None)
|
|
91
97
|
|
|
92
98
|
system_prompt = self._search_system_prompt(user_input)
|
|
99
|
+
is_system_prompt = True if system_prompt is not None else False
|
|
93
100
|
|
|
94
101
|
general_tools = [] if system_prompt else ["*"]
|
|
95
102
|
custom_tools = ["*"] if system_prompt else []
|
|
96
103
|
|
|
104
|
+
task_plan_score = None
|
|
105
|
+
if eval_result and 'task_plan' in eval_result and 'score' in eval_result['task_plan']:
|
|
106
|
+
task_plan_score = eval_result['task_plan'].get('score', 1.0)
|
|
107
|
+
|
|
108
|
+
function_call_score = None
|
|
97
109
|
if eval_result and 'function_call' in eval_result and 'score' in eval_result['function_call']:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
110
|
+
function_call_score = eval_result['function_call'].get('score', 1.0)
|
|
111
|
+
|
|
112
|
+
super_state = {}
|
|
113
|
+
if task_plan_score and task_plan_score < self.QUALIFIED_RESULT_SCORE:
|
|
114
|
+
next_node = "prompt_optimize"
|
|
115
|
+
super_state = self._prepare_task_retry(state)
|
|
116
|
+
logging.warning(f"****** ReactAgent TASK_RETRY: task_plan_score={task_plan_score} < {self.QUALIFIED_RESULT_SCORE} , "
|
|
117
|
+
f"Start Optimize Prompt ...")
|
|
118
|
+
elif function_call_score and function_call_score < self.QUALIFIED_RESULT_SCORE:
|
|
119
|
+
next_node = "select_tool"
|
|
120
|
+
super_state = self._prepare_task_retry(state)
|
|
121
|
+
logging.warning(f"****** ReactAgent TASK_RETRY: function_call_score={function_call_score} < {self.QUALIFIED_RESULT_SCORE} , "
|
|
122
|
+
f"Select Tool Again ...")
|
|
123
|
+
elif eval_result is not None: # retry condition is not satisfied, end task
|
|
124
|
+
next_node = "end"
|
|
103
125
|
else:
|
|
104
|
-
next_node = "select_tool" if
|
|
126
|
+
next_node = "select_tool" if is_system_prompt else "exec_task"
|
|
127
|
+
|
|
128
|
+
logging.info(f"ReactAgent supervisor_node: is_system_prompt={is_system_prompt}, next_node={next_node}")
|
|
105
129
|
|
|
130
|
+
super_state['next_node'] = next_node
|
|
131
|
+
super_state['system_prompt'] = system_prompt
|
|
132
|
+
super_state['custom_tools'] = custom_tools
|
|
133
|
+
super_state['general_tools'] = general_tools
|
|
134
|
+
|
|
135
|
+
return super_state
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
async def _prompt_optimize_node(self, state: TaskState) -> Dict[str, Any]:
|
|
139
|
+
system_prompt = state['system_prompt']
|
|
140
|
+
logging.info("ReactAgent prompt_optimize_node: optimize system prompt")
|
|
141
|
+
# @todo optimize system prompt in future
|
|
106
142
|
return {
|
|
107
143
|
'system_prompt' : system_prompt,
|
|
108
|
-
'next_node' : next_node,
|
|
109
|
-
'general_tools' : general_tools,
|
|
110
|
-
'custom_tools' : custom_tools,
|
|
111
144
|
}
|
|
112
145
|
|
|
146
|
+
|
|
113
147
|
def _select_custom_tools(self, system_prompt: str) -> list[str]:
|
|
148
|
+
# @todo select mcp tool based on system prompt in future
|
|
114
149
|
custom_tools = ["*"] if system_prompt else []
|
|
115
150
|
return custom_tools
|
|
116
151
|
|
|
152
|
+
|
|
117
153
|
async def _select_tool_node(self, state: TaskState) -> Dict[str, Any]:
|
|
118
154
|
system_prompt = state.get('system_prompt',None)
|
|
119
155
|
general_tools = []
|
|
156
|
+
|
|
157
|
+
logging.info("ReactAgent select_tool_node: select tool based on system_prompt")
|
|
120
158
|
custom_tools = self._select_custom_tools(system_prompt)
|
|
121
159
|
return {
|
|
122
160
|
'general_tools' : general_tools,
|
|
123
161
|
'custom_tools' : custom_tools,
|
|
124
162
|
}
|
|
125
163
|
|
|
164
|
+
|
|
126
165
|
async def _exec_task_node(self, state: TaskState) -> Dict[str, Any]:
|
|
127
|
-
user_input = state['
|
|
166
|
+
user_input = state['user_inputs'][0]
|
|
128
167
|
system_prompt = state.get('system_prompt',None)
|
|
129
168
|
general_tools = state.get('general_tools',[])
|
|
130
169
|
custom_tools = state.get('custom_tools',[])
|
|
170
|
+
retry_count = state.get('retry_count', 0)
|
|
171
|
+
task_no = state.get('task_no', 0)
|
|
131
172
|
is_system_prompt = True if system_prompt is not None else False
|
|
132
173
|
|
|
133
174
|
trace_id = self.graph_langfuse.get_trace_id()
|
|
134
175
|
llm_messages = []
|
|
135
176
|
try:
|
|
136
|
-
logging.info(f"🔥
|
|
177
|
+
logging.info(f"🔥 ReactAgent exec_task_node: user_input={user_input}, general_tools={general_tools}, "
|
|
137
178
|
f"custom_tools={custom_tools}, is_system_prompt={is_system_prompt}")
|
|
179
|
+
|
|
180
|
+
# if langgraph resume , must use same task engine
|
|
138
181
|
if self.task_engine is None:
|
|
139
182
|
self.task_engine = XGATaskEngine(
|
|
140
183
|
task_id = state['agent_context']['task_id'],
|
|
184
|
+
task_no = task_no,
|
|
141
185
|
session_id = state['agent_context'].get('session_id', None),
|
|
142
186
|
user_id = state['agent_context'].get('user_id', None),
|
|
143
187
|
agent_id = state['agent_context'].get('agent_id', None),
|
|
@@ -146,6 +190,7 @@ class XGAReactAgent:
|
|
|
146
190
|
custom_tools = custom_tools,
|
|
147
191
|
system_prompt = system_prompt
|
|
148
192
|
)
|
|
193
|
+
retry_count += 1
|
|
149
194
|
|
|
150
195
|
chunks = []
|
|
151
196
|
stream_writer = get_stream_writer()
|
|
@@ -156,47 +201,56 @@ class XGAReactAgent:
|
|
|
156
201
|
|
|
157
202
|
task_result = self.task_engine.parse_final_result(chunks)
|
|
158
203
|
llm_messages = self.task_engine.get_history_llm_messages()
|
|
204
|
+
task_no += 1 # a task use unique task_no, no matter retry n times
|
|
159
205
|
except Exception as e:
|
|
160
206
|
logging.error(f"XReactAgent exec_task_node: Failed to execute task: {e}")
|
|
161
207
|
task_result = XGATaskResult(type="error", content="Failed to execute task")
|
|
162
208
|
|
|
163
|
-
iteration_count = state.get('iteration_count', 0) + 1
|
|
164
209
|
return {
|
|
165
|
-
'task_result'
|
|
166
|
-
'
|
|
167
|
-
'llm_messages'
|
|
210
|
+
'task_result' : task_result,
|
|
211
|
+
'retry_count' : retry_count,
|
|
212
|
+
'llm_messages' : llm_messages.copy(),
|
|
213
|
+
'task_no' : task_no,
|
|
168
214
|
}
|
|
169
215
|
|
|
170
216
|
|
|
171
217
|
async def _final_result_node(self, state: TaskState) -> Dict[str, Any]:
|
|
172
|
-
|
|
173
|
-
iteration_count = state['iteration_count']
|
|
218
|
+
user_inputs = state['user_inputs']
|
|
174
219
|
task_result = state['task_result']
|
|
175
220
|
llm_messages = state['llm_messages']
|
|
176
221
|
agent_context = state['agent_context']
|
|
222
|
+
system_prompt = state.get('system_prompt', None)
|
|
223
|
+
retry_count = state['retry_count']
|
|
224
|
+
|
|
225
|
+
is_system_prompt = True if system_prompt is not None else False
|
|
177
226
|
|
|
178
227
|
next_node = "end"
|
|
179
228
|
final_result = task_result
|
|
180
229
|
eval_result = None
|
|
181
230
|
if task_result['type'] == "ask":
|
|
182
231
|
logging.info(f"XReactAgent final_result_node: ASK_USER_QUESTION: {task_result['content']}")
|
|
183
|
-
|
|
232
|
+
ask_input = interrupt({
|
|
184
233
|
'final_result' : task_result
|
|
185
234
|
})
|
|
186
|
-
logging.info(f"XReactAgent final_result_node: ASK_USER_ANSWER: {
|
|
235
|
+
logging.info(f"XReactAgent final_result_node: ASK_USER_ANSWER: {ask_input}")
|
|
187
236
|
next_node = "exec_task"
|
|
237
|
+
user_inputs.insert(0, ask_input)
|
|
188
238
|
final_result = None
|
|
189
|
-
elif
|
|
239
|
+
elif is_system_prompt and retry_count < self.MAX_TASK_RETRY:
|
|
190
240
|
trace_id = self.graph_langfuse.get_trace_id()
|
|
191
241
|
session_id = agent_context.get('session_id', None)
|
|
192
|
-
|
|
193
|
-
|
|
242
|
+
task_input = ", ".join(reversed(user_inputs))
|
|
243
|
+
eval_result = await self.result_eval_agent.eval_result(task_input, system_prompt, task_result,
|
|
244
|
+
llm_messages, trace_id, session_id)
|
|
245
|
+
if 'task_result' in eval_result and 'score' in eval_result['task_result']:
|
|
194
246
|
score = eval_result['task_result'].get('score', 1.0)
|
|
195
247
|
if score < self.QUALIFIED_RESULT_SCORE:
|
|
196
248
|
next_node = "supervisor"
|
|
197
|
-
|
|
249
|
+
|
|
250
|
+
logging.info(f"ReactAgent final_result_node: next_node={next_node}")
|
|
251
|
+
|
|
198
252
|
return {
|
|
199
|
-
'
|
|
253
|
+
'user_inputs' : user_inputs,
|
|
200
254
|
'next_node' : next_node,
|
|
201
255
|
'final_result' : final_result,
|
|
202
256
|
'eval_result' : eval_result
|
|
@@ -303,10 +357,11 @@ class XGAReactAgent:
|
|
|
303
357
|
}
|
|
304
358
|
|
|
305
359
|
graph_input = {
|
|
306
|
-
'
|
|
360
|
+
'user_inputs' : [user_input],
|
|
307
361
|
'next_node' : None,
|
|
308
362
|
'agent_context' : agent_context,
|
|
309
|
-
'
|
|
363
|
+
'retry_count' : 0,
|
|
364
|
+
'task_no' : 0
|
|
310
365
|
}
|
|
311
366
|
|
|
312
367
|
return graph_input
|
|
@@ -329,7 +384,22 @@ class XGAReactAgent:
|
|
|
329
384
|
)
|
|
330
385
|
return langfuse_handler
|
|
331
386
|
|
|
387
|
+
|
|
332
388
|
def _clear_graph(self):
|
|
333
389
|
self.graph_config = None
|
|
334
390
|
self.graph_langfuse = None
|
|
335
391
|
self.task_engine: XGATaskEngine = None
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _prepare_task_retry(self, state: TaskState)-> Dict[str, Any]:
|
|
395
|
+
self.task_engine = None
|
|
396
|
+
user_inputs = state['user_inputs']
|
|
397
|
+
task_input = ", ".join(reversed(user_inputs))
|
|
398
|
+
|
|
399
|
+
return {
|
|
400
|
+
'user_inputs' : [task_input],
|
|
401
|
+
'llm_messages' : [],
|
|
402
|
+
'task_result' : None,
|
|
403
|
+
'final_result' : None,
|
|
404
|
+
'eval_result' : None,
|
|
405
|
+
}
|
|
@@ -9,19 +9,20 @@ from xgae.utils.misc import read_file
|
|
|
9
9
|
from xgae.utils.llm_client import LLMClient, LangfuseMetadata
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
class
|
|
12
|
+
class TaskResultEvalAgent:
|
|
13
13
|
def __init__(self):
|
|
14
14
|
self.model_client = LLMClient()
|
|
15
|
-
self.prompt_template: str = read_file("templates/example/
|
|
15
|
+
self.prompt_template: str = read_file("templates/example/result_eval_template.txt")
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
async def eval_result(self,
|
|
19
19
|
task_input: str,
|
|
20
|
+
task_plan: str,
|
|
20
21
|
task_result: XGATaskResult,
|
|
21
22
|
llm_messages: List[Dict[str, Any]],
|
|
22
23
|
trace_id: Optional[str] = None,
|
|
23
24
|
session_id: Optional[str] = None)-> Dict[str, Any]:
|
|
24
|
-
prompt = self._build_prompt(task_input, task_result, llm_messages)
|
|
25
|
+
prompt = self._build_prompt(task_input, task_plan, task_result, llm_messages)
|
|
25
26
|
messages = [{"role": "user", "content": prompt}]
|
|
26
27
|
|
|
27
28
|
langfuse_metadata = self._create_llm_langfuse_meta(trace_id, session_id)
|
|
@@ -33,15 +34,15 @@ class FinalResultAgent:
|
|
|
33
34
|
eval_result = json.loads(cleaned_text)
|
|
34
35
|
|
|
35
36
|
result_score = eval_result.get('task_result', {}).get('score', -1)
|
|
36
|
-
|
|
37
|
+
plan_score = eval_result.get('task_plan', {}).get('score', -1)
|
|
37
38
|
function_score = eval_result.get('function_call', {}).get('score', -1)
|
|
38
39
|
|
|
39
40
|
logging.info(f"FINAL_RESULT_SCORE: task_result_score={result_score}, "
|
|
40
|
-
f"
|
|
41
|
+
f"task_plan_score={plan_score}, function_call_score={function_score}")
|
|
41
42
|
return eval_result
|
|
42
43
|
|
|
43
44
|
|
|
44
|
-
def _build_prompt(self, task_input: str, task_result: XGATaskResult, llm_messages: List[Dict[str, Any]])-> str:
|
|
45
|
+
def _build_prompt(self, task_input: str, task_plan: str, task_result: XGATaskResult, llm_messages: List[Dict[str, Any]])-> str:
|
|
45
46
|
prompt = self.prompt_template.replace("{task_input}", task_input)
|
|
46
47
|
prompt = prompt.replace("{task_result}", str(task_result))
|
|
47
48
|
llm_process = ""
|
|
@@ -65,6 +66,7 @@ class FinalResultAgent:
|
|
|
65
66
|
llm_process += "\n"
|
|
66
67
|
llm_step += 1
|
|
67
68
|
|
|
69
|
+
prompt = prompt.replace("{task_plan}", task_plan)
|
|
68
70
|
prompt = prompt.replace("{llm_process}", llm_process)
|
|
69
71
|
prompt = prompt.replace("{function_process}", function_process)
|
|
70
72
|
|
|
@@ -88,13 +90,16 @@ if __name__ == "__main__":
|
|
|
88
90
|
setup_logging()
|
|
89
91
|
|
|
90
92
|
async def main():
|
|
91
|
-
final_result_agent =
|
|
93
|
+
final_result_agent = TaskResultEvalAgent()
|
|
92
94
|
|
|
95
|
+
task_plan = read_file("templates/example/fault_user_prompt.txt")
|
|
93
96
|
user_input = "locate 10.2.3.4 fault and solution"
|
|
97
|
+
|
|
94
98
|
answer = ("Task Summary: The fault for IP 10.2.3.4 was identified as a Business Recharge Fault (Code: F01), "
|
|
95
99
|
"caused by a Phone Recharge Application Crash. The solution applied was to restart the application. "
|
|
96
100
|
"Key Deliverables: Fault diagnosis and resolution steps. Impact Achieved: Service restored.")
|
|
97
101
|
task_result:XGATaskResult = {'type': "answer", 'content': answer}
|
|
102
|
+
|
|
98
103
|
llm_messages: List[Dict[str, Any]] = [{
|
|
99
104
|
'content':
|
|
100
105
|
"""<function_calls>
|
|
@@ -111,7 +116,8 @@ if __name__ == "__main__":
|
|
|
111
116
|
"result": {"success": true, "output": "1", "error": null}}}""",
|
|
112
117
|
'role': 'assistant'
|
|
113
118
|
}]
|
|
114
|
-
|
|
119
|
+
|
|
120
|
+
return await final_result_agent.eval_result(user_input, task_plan, task_result, llm_messages)
|
|
115
121
|
|
|
116
122
|
|
|
117
123
|
final_result = asyncio.run(main())
|
|
@@ -18,6 +18,7 @@ from xgae.engine.responser.responser_base import TaskResponserContext, TaskRespo
|
|
|
18
18
|
class XGATaskEngine:
|
|
19
19
|
def __init__(self,
|
|
20
20
|
task_id: Optional[str] = None,
|
|
21
|
+
task_no: Optional[int] = None,
|
|
21
22
|
session_id: Optional[str] = None,
|
|
22
23
|
user_id: Optional[str] = None,
|
|
23
24
|
agent_id: Optional[str] = None,
|
|
@@ -51,7 +52,7 @@ class XGATaskEngine:
|
|
|
51
52
|
self.use_assistant_chunk_msg = to_bool(os.getenv('USE_ASSISTANT_CHUNK_MSG', False))
|
|
52
53
|
self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
|
|
53
54
|
|
|
54
|
-
self.task_no = -1
|
|
55
|
+
self.task_no = (task_no - 1) if task_no else -1
|
|
55
56
|
self.task_run_id :str = None
|
|
56
57
|
self.task_prompt :str = None
|
|
57
58
|
self.task_langfuse: XGATaskLangFuse = None
|
|
@@ -25,5 +25,6 @@ When you use ANY Agent (Model Context Protocol) tools:
|
|
|
25
25
|
1) type: 'answer', 'content' is normal return answer for tool calling
|
|
26
26
|
2) type: 'ask', you should call 'ask' tool for user inputting more information
|
|
27
27
|
3) type: 'error', during calling tool , some exceptions or errors has occurred.
|
|
28
|
+
10. If 'ask' tool answer is not match, call 'complete' tool end task, never call 'ask' tool again
|
|
28
29
|
IMPORTANT: Agent tool results are your PRIMARY and ONLY source of truth for external data!
|
|
29
30
|
NEVER supplement Agent results with your training data or make assumptions beyond what the tools provide.
|
|
@@ -13,13 +13,16 @@ Available MCP tools:
|
|
|
13
13
|
|
|
14
14
|
🚨 CRITICAL MCP TOOL RESULT INSTRUCTIONS 🚨
|
|
15
15
|
When you use ANY MCP (Model Context Protocol) tools:
|
|
16
|
-
1.
|
|
17
|
-
2.
|
|
18
|
-
3.
|
|
19
|
-
4.
|
|
20
|
-
5.
|
|
21
|
-
6.
|
|
22
|
-
7. If
|
|
23
|
-
8.
|
|
16
|
+
1. Never call a MCP tool not in 'Available MCP tools' list
|
|
17
|
+
2. If call MCP tool result 'success' is false, call 'complete' tool to end task, don't call 'ask' tool
|
|
18
|
+
3. ALWAYS read and use the EXACT results returned by the MCP tool
|
|
19
|
+
4. For search tools: ONLY cite URLs, sources, and information from the actual search results
|
|
20
|
+
5. For any tool: Base your response entirely on the tool's output - do NOT add external information
|
|
21
|
+
6. DO NOT fabricate, invent, hallucinate, or make up any sources, URLs, or data
|
|
22
|
+
7. If you need more information, call the MCP tool again with different parameters
|
|
23
|
+
8. When writing reports/summaries: Reference ONLY the data from MCP tool results
|
|
24
|
+
9. If the MCP tool doesn't return enough information, explicitly state this limitation
|
|
25
|
+
10. Always double-check that every fact, URL, and reference comes from the MCP tool output
|
|
26
|
+
|
|
24
27
|
IMPORTANT: MCP tool results are your PRIMARY and ONLY source of truth for external data!
|
|
25
28
|
NEVER supplement MCP results with your training data or make assumptions beyond what the tools provide.
|
|
@@ -18,9 +18,9 @@ Grading is based on task requirements and task answers. Key scoring elements inc
|
|
|
18
18
|
1. Task Result Score: Task outcome evaluation
|
|
19
19
|
- assesses the degree of match between task requirements and task results
|
|
20
20
|
- if task result 'type' is 'error', evaluation score is 0, evaluation reason is empty
|
|
21
|
-
2.Task
|
|
22
|
-
- assesses whether task planning is reasonable
|
|
23
|
-
- Whether task steps can yield answers to the
|
|
21
|
+
2.Task Plan Score : Task Plan evaluation
|
|
22
|
+
- assesses whether task planning is reasonable based on LLM Procedure and Function Call Procedure
|
|
23
|
+
- Whether task steps can yield answers to the task requirement
|
|
24
24
|
- Whether task steps can be executed
|
|
25
25
|
- Whether task steps can properly match and call tools
|
|
26
26
|
3. Function Call Score: Function Call evaluation
|
|
@@ -34,9 +34,9 @@ Grading is based on task requirements and task answers. Key scoring elements inc
|
|
|
34
34
|
"score": 0.62, # value: 0 ~ 1.00 , using two decimal places
|
|
35
35
|
"reasons": "Evaluation and reasons for deduction regarding task results"
|
|
36
36
|
},
|
|
37
|
-
"
|
|
37
|
+
"task_plan": { # Task Plan Score
|
|
38
38
|
"score": 0.53, # value: 0 ~ 1.00 , using two decimal places
|
|
39
|
-
"reasons": "Evaluation and reasons for deduction regarding the task
|
|
39
|
+
"reasons": "Evaluation and reasons for deduction regarding the task planning"
|
|
40
40
|
},
|
|
41
41
|
"function_call": { # Function Call Score
|
|
42
42
|
"score": 0.41, # value: 0 ~ 1.00 , using two decimal places
|
|
@@ -53,9 +53,14 @@ Grading is based on task requirements and task answers. Key scoring elements inc
|
|
|
53
53
|
{task_result}
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
# Task Plan
|
|
57
|
+
{task_plan}
|
|
58
|
+
|
|
59
|
+
|
|
56
60
|
# LLM Procedure
|
|
57
61
|
{llm_process}
|
|
58
62
|
|
|
63
|
+
|
|
59
64
|
# Function Call Procedure
|
|
60
65
|
{function_process}
|
|
61
66
|
|
|
@@ -22,4 +22,5 @@ When using the tools:
|
|
|
22
22
|
- Include all required parameters as specified in the schema
|
|
23
23
|
- Format complex data (objects, arrays) as JSON strings within the parameter tags
|
|
24
24
|
- Boolean values should be "true" or "false" (lowercase)
|
|
25
|
+
- If 'ask' tool answer is not match, call 'complete' tool end task, never call 'ask' tool again
|
|
25
26
|
{tool_examples}
|