xgae 0.1.20__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xgae might be problematic. Click here for more details.

Files changed (48) hide show
  1. {xgae-0.1.20 → xgae-0.2.0}/CHANGELOG.md +12 -3
  2. {xgae-0.1.20 → xgae-0.2.0}/PKG-INFO +1 -1
  3. {xgae-0.1.20 → xgae-0.2.0}/pyproject.toml +1 -1
  4. {xgae-0.1.20 → xgae-0.2.0}/src/examples/agent/langgraph/react/agent_base.py +3 -2
  5. {xgae-0.1.20 → xgae-0.2.0}/src/examples/agent/langgraph/react/react_agent.py +104 -34
  6. xgae-0.1.20/src/examples/agent/langgraph/react/final_result_agent.py → xgae-0.2.0/src/examples/agent/langgraph/react/result_eval_agent.py +14 -8
  7. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/task_engine.py +2 -1
  8. {xgae-0.1.20 → xgae-0.2.0}/templates/agent_tool_prompt_template.txt +1 -0
  9. {xgae-0.1.20 → xgae-0.2.0}/templates/custom_tool_prompt_template.txt +11 -8
  10. xgae-0.1.20/templates/example/final_result_template.txt → xgae-0.2.0/templates/example/result_eval_template.txt +10 -5
  11. {xgae-0.1.20 → xgae-0.2.0}/templates/general_tool_prompt_template.txt +1 -0
  12. xgae-0.2.0/uv.lock +1463 -0
  13. xgae-0.1.20/uv.lock +0 -1463
  14. {xgae-0.1.20 → xgae-0.2.0}/.env +0 -0
  15. {xgae-0.1.20 → xgae-0.2.0}/.python-version +0 -0
  16. {xgae-0.1.20 → xgae-0.2.0}/README.md +0 -0
  17. {xgae-0.1.20 → xgae-0.2.0}/mcpservers/custom_servers.json +0 -0
  18. {xgae-0.1.20 → xgae-0.2.0}/mcpservers/xga_server.json +0 -0
  19. {xgae-0.1.20 → xgae-0.2.0}/mcpservers/xga_server_sse.json +0 -0
  20. {xgae-0.1.20 → xgae-0.2.0}/src/examples/agent/langgraph/react/run_react_agent.py +0 -0
  21. {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_custom_and_agent_tools.py +0 -0
  22. {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_general_tools.py +0 -0
  23. {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_human_in_loop.py +0 -0
  24. {xgae-0.1.20 → xgae-0.2.0}/src/examples/engine/run_simple.py +0 -0
  25. {xgae-0.1.20 → xgae-0.2.0}/src/examples/tools/custom_fault_tools_app.py +0 -0
  26. {xgae-0.1.20 → xgae-0.2.0}/src/examples/tools/simu_a2a_tools_app.py +0 -0
  27. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/__init__.py +0 -0
  28. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/engine_base.py +0 -0
  29. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/mcp_tool_box.py +0 -0
  30. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/prompt_builder.py +0 -0
  31. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/responser/non_stream_responser.py +0 -0
  32. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/responser/responser_base.py +0 -0
  33. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/responser/stream_responser.py +0 -0
  34. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine/task_langfuse.py +0 -0
  35. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/engine_cli_app.py +0 -0
  36. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/tools/without_general_tools_app.py +0 -0
  37. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/__init__.py +0 -0
  38. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/json_helpers.py +0 -0
  39. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/llm_client.py +0 -0
  40. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/misc.py +0 -0
  41. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/setup_env.py +0 -0
  42. {xgae-0.1.20 → xgae-0.2.0}/src/xgae/utils/xml_tool_parser.py +0 -0
  43. {xgae-0.1.20 → xgae-0.2.0}/templates/example/fault_user_prompt.txt +0 -0
  44. {xgae-0.1.20 → xgae-0.2.0}/templates/gemini_system_prompt_template.txt +0 -0
  45. {xgae-0.1.20 → xgae-0.2.0}/templates/system_prompt_response_sample.txt +0 -0
  46. {xgae-0.1.20 → xgae-0.2.0}/templates/system_prompt_template.txt +0 -0
  47. {xgae-0.1.20 → xgae-0.2.0}/test/test_langfuse.py +0 -0
  48. {xgae-0.1.20 → xgae-0.2.0}/test/test_litellm_langfuse.py +0 -0
@@ -1,12 +1,21 @@
1
1
  # Release Changelog
2
- ## [0.1.20] - 2025-9-8
2
+ ## [0.2.0] - 2025-9-10
3
3
  ### Added
4
- - Example: Langgraph react agent add final_result_agent
4
+ - Agent Engine release 0.2
5
+ - Example: Langgraph ReactAgent release 0.2
6
+ ### Fixed
7
+ - Agent Engine: call mcp tool fail, call 'ask' tool again and again
8
+ - Example Langgraph ReactAgent: retry on 'ask', user_input is ask answer
9
+
10
+
11
+ ## [0.1.20] - 2025-9-9
12
+ ### Added
13
+ - Example: Langgraph ReactAgent add final_result_agent
5
14
 
6
15
 
7
16
  ## [0.1.19] - 2025-9-8
8
17
  ### Added
9
- - Example: Langgraph react agent release V1, full logic but no final result agent and tool select agent
18
+ - Example: Langgraph ReactAgent release V1, full logic but no final result agent and tool select agent
10
19
 
11
20
 
12
21
  # Release Changelog
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xgae
3
- Version: 0.1.20
3
+ Version: 0.2.0
4
4
  Summary: Extreme General Agent Engine
5
5
  Requires-Python: >=3.13
6
6
  Requires-Dist: colorlog==6.9.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "xgae"
3
- version = "0.1.20"
3
+ version = "0.2.0"
4
4
  description = "Extreme General Agent Engine"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -17,7 +17,7 @@ class AgentContext(TypedDict, total=False):
17
17
  class TaskState(TypedDict, total=False):
18
18
  """State definition for the agent orchestration graph"""
19
19
  llm_messages: List[Dict[str, Any]]
20
- user_input: str
20
+ user_inputs: List[str]
21
21
  next_node: str
22
22
  system_prompt: str
23
23
  custom_tools: List[str]
@@ -25,7 +25,8 @@ class TaskState(TypedDict, total=False):
25
25
  task_result: XGATaskResult
26
26
  final_result: XGATaskResult
27
27
  eval_result: EvaluateResult
28
- iteration_count: int
28
+ retry_count: int
29
+ task_no: int
29
30
  agent_context: AgentContext
30
31
 
31
32
 
@@ -21,7 +21,7 @@ from xgae.engine.mcp_tool_box import XGAMcpToolBox
21
21
  from xgae.engine.task_engine import XGATaskEngine
22
22
 
23
23
  from examples.agent.langgraph.react.agent_base import AgentContext, TaskState, EvaluateResult
24
- from examples.agent.langgraph.react.final_result_agent import FinalResultAgent
24
+ from examples.agent.langgraph.react.result_eval_agent import TaskResultEvalAgent
25
25
 
26
26
  class XGAReactAgent:
27
27
  MAX_TASK_RETRY = 2
@@ -35,7 +35,8 @@ class XGAReactAgent:
35
35
  self.task_engine: XGATaskEngine = None
36
36
 
37
37
  self.tool_box = XGAMcpToolBox(custom_mcp_server_file="mcpservers/custom_servers.json")
38
- self.final_result_agent = FinalResultAgent()
38
+ self.result_eval_agent = TaskResultEvalAgent()
39
+
39
40
 
40
41
  async def _create_graph(self) -> StateGraph:
41
42
  try:
@@ -43,6 +44,7 @@ class XGAReactAgent:
43
44
 
44
45
  # Add nodes
45
46
  graph_builder.add_node('supervisor', self._supervisor_node)
47
+ graph_builder.add_node('prompt_optimize', self._prompt_optimize_node)
46
48
  graph_builder.add_node('select_tool', self._select_tool_node)
47
49
  graph_builder.add_node('exec_task', self._exec_task_node)
48
50
  graph_builder.add_node('final_result', self._final_result_node)
@@ -53,12 +55,14 @@ class XGAReactAgent:
53
55
  'supervisor',
54
56
  self._next_condition,
55
57
  {
56
- 'select_tool': 'select_tool',
57
- 'exec_task': 'exec_task',
58
- 'end': END
58
+ 'select_tool' : 'select_tool',
59
+ 'exec_task' : 'exec_task',
60
+ 'prompt_optimize' : 'prompt_optimize',
61
+ 'end' : END
59
62
  }
60
63
  )
61
64
 
65
+ graph_builder.add_edge('prompt_optimize', 'select_tool')
62
66
  graph_builder.add_edge('select_tool', 'exec_task')
63
67
  graph_builder.add_edge('exec_task', 'final_result')
64
68
 
@@ -67,8 +71,8 @@ class XGAReactAgent:
67
71
  self._next_condition,
68
72
  {
69
73
  'supervisor': 'supervisor',
70
- 'exec_task': 'exec_task',
71
- 'end': END
74
+ 'exec_task' : 'exec_task',
75
+ 'end' : END
72
76
  }
73
77
  )
74
78
 
@@ -80,64 +84,104 @@ class XGAReactAgent:
80
84
  logging.error("Failed to create XGARectAgent Graph: %s", str(e))
81
85
  raise
82
86
 
87
+
83
88
  def _search_system_prompt(self, user_input: str) -> str:
84
89
  # You should search RAG use user_input, fetch COT or Prompt for your business
85
90
  system_prompt = None if "fault" not in user_input else read_file("templates/example/fault_user_prompt.txt")
86
91
  return system_prompt
87
92
 
93
+
88
94
  async def _supervisor_node(self, state: TaskState) -> Dict[str, Any]:
89
- user_input = state['user_input']
95
+ user_input = state['user_inputs'][0]
90
96
  eval_result = state.get('eval_result', None)
91
97
 
92
98
  system_prompt = self._search_system_prompt(user_input)
99
+ is_system_prompt = True if system_prompt is not None else False
93
100
 
94
101
  general_tools = [] if system_prompt else ["*"]
95
102
  custom_tools = ["*"] if system_prompt else []
96
103
 
104
+ task_plan_score = None
105
+ if eval_result and 'task_plan' in eval_result and 'score' in eval_result['task_plan']:
106
+ task_plan_score = eval_result['task_plan'].get('score', 1.0)
107
+
108
+ function_call_score = None
97
109
  if eval_result and 'function_call' in eval_result and 'score' in eval_result['function_call']:
98
- score = eval_result['function_call'].get('score', 1.0)
99
- if score < self.QUALIFIED_RESULT_SCORE:
100
- next_node = "select_tool"
101
- else:
102
- next_node = "end"
110
+ function_call_score = eval_result['function_call'].get('score', 1.0)
111
+
112
+ super_state = {}
113
+ if task_plan_score and task_plan_score < self.QUALIFIED_RESULT_SCORE:
114
+ next_node = "prompt_optimize"
115
+ super_state = self._prepare_task_retry(state)
116
+ logging.warning(f"****** ReactAgent TASK_RETRY: task_plan_score={task_plan_score} < {self.QUALIFIED_RESULT_SCORE} , "
117
+ f"Start Optimize Prompt ...")
118
+ elif function_call_score and function_call_score < self.QUALIFIED_RESULT_SCORE:
119
+ next_node = "select_tool"
120
+ super_state = self._prepare_task_retry(state)
121
+ logging.warning(f"****** ReactAgent TASK_RETRY: function_call_score={function_call_score} < {self.QUALIFIED_RESULT_SCORE} , "
122
+ f"Select Tool Again ...")
123
+ elif eval_result is not None: # retry condition is not satisfied, end task
124
+ next_node = "end"
103
125
  else:
104
- next_node = "select_tool" if system_prompt else "exec_task"
126
+ next_node = "select_tool" if is_system_prompt else "exec_task"
127
+
128
+ logging.info(f"ReactAgent supervisor_node: is_system_prompt={is_system_prompt}, next_node={next_node}")
105
129
 
130
+ super_state['next_node'] = next_node
131
+ super_state['system_prompt'] = system_prompt
132
+ super_state['custom_tools'] = custom_tools
133
+ super_state['general_tools'] = general_tools
134
+
135
+ return super_state
136
+
137
+
138
+ async def _prompt_optimize_node(self, state: TaskState) -> Dict[str, Any]:
139
+ system_prompt = state['system_prompt']
140
+ logging.info("ReactAgent prompt_optimize_node: optimize system prompt")
141
+ # @todo optimize system prompt in future
106
142
  return {
107
143
  'system_prompt' : system_prompt,
108
- 'next_node' : next_node,
109
- 'general_tools' : general_tools,
110
- 'custom_tools' : custom_tools,
111
144
  }
112
145
 
146
+
113
147
  def _select_custom_tools(self, system_prompt: str) -> list[str]:
148
+ # @todo select mcp tool based on system prompt in future
114
149
  custom_tools = ["*"] if system_prompt else []
115
150
  return custom_tools
116
151
 
152
+
117
153
  async def _select_tool_node(self, state: TaskState) -> Dict[str, Any]:
118
154
  system_prompt = state.get('system_prompt',None)
119
155
  general_tools = []
156
+
157
+ logging.info("ReactAgent select_tool_node: select tool based on system_prompt")
120
158
  custom_tools = self._select_custom_tools(system_prompt)
121
159
  return {
122
160
  'general_tools' : general_tools,
123
161
  'custom_tools' : custom_tools,
124
162
  }
125
163
 
164
+
126
165
  async def _exec_task_node(self, state: TaskState) -> Dict[str, Any]:
127
- user_input = state['user_input']
166
+ user_input = state['user_inputs'][0]
128
167
  system_prompt = state.get('system_prompt',None)
129
168
  general_tools = state.get('general_tools',[])
130
169
  custom_tools = state.get('custom_tools',[])
170
+ retry_count = state.get('retry_count', 0)
171
+ task_no = state.get('task_no', 0)
131
172
  is_system_prompt = True if system_prompt is not None else False
132
173
 
133
174
  trace_id = self.graph_langfuse.get_trace_id()
134
175
  llm_messages = []
135
176
  try:
136
- logging.info(f"🔥 XGATaskEngine run_task: user_input={user_input}, general_tools={general_tools}, "
177
+ logging.info(f"🔥 ReactAgent exec_task_node: user_input={user_input}, general_tools={general_tools}, "
137
178
  f"custom_tools={custom_tools}, is_system_prompt={is_system_prompt}")
179
+
180
+ # if langgraph resume , must use same task engine
138
181
  if self.task_engine is None:
139
182
  self.task_engine = XGATaskEngine(
140
183
  task_id = state['agent_context']['task_id'],
184
+ task_no = task_no,
141
185
  session_id = state['agent_context'].get('session_id', None),
142
186
  user_id = state['agent_context'].get('user_id', None),
143
187
  agent_id = state['agent_context'].get('agent_id', None),
@@ -146,6 +190,7 @@ class XGAReactAgent:
146
190
  custom_tools = custom_tools,
147
191
  system_prompt = system_prompt
148
192
  )
193
+ retry_count += 1
149
194
 
150
195
  chunks = []
151
196
  stream_writer = get_stream_writer()
@@ -156,47 +201,56 @@ class XGAReactAgent:
156
201
 
157
202
  task_result = self.task_engine.parse_final_result(chunks)
158
203
  llm_messages = self.task_engine.get_history_llm_messages()
204
+ task_no += 1 # a task use unique task_no, no matter retry n times
159
205
  except Exception as e:
160
206
  logging.error(f"XReactAgent exec_task_node: Failed to execute task: {e}")
161
207
  task_result = XGATaskResult(type="error", content="Failed to execute task")
162
208
 
163
- iteration_count = state.get('iteration_count', 0) + 1
164
209
  return {
165
- 'task_result' : task_result,
166
- 'iteration_count': iteration_count,
167
- 'llm_messages' : llm_messages.copy()
210
+ 'task_result' : task_result,
211
+ 'retry_count' : retry_count,
212
+ 'llm_messages' : llm_messages.copy(),
213
+ 'task_no' : task_no,
168
214
  }
169
215
 
170
216
 
171
217
  async def _final_result_node(self, state: TaskState) -> Dict[str, Any]:
172
- user_input = state['user_input']
173
- iteration_count = state['iteration_count']
218
+ user_inputs = state['user_inputs']
174
219
  task_result = state['task_result']
175
220
  llm_messages = state['llm_messages']
176
221
  agent_context = state['agent_context']
222
+ system_prompt = state.get('system_prompt', None)
223
+ retry_count = state['retry_count']
224
+
225
+ is_system_prompt = True if system_prompt is not None else False
177
226
 
178
227
  next_node = "end"
179
228
  final_result = task_result
180
229
  eval_result = None
181
230
  if task_result['type'] == "ask":
182
231
  logging.info(f"XReactAgent final_result_node: ASK_USER_QUESTION: {task_result['content']}")
183
- user_input = interrupt({
232
+ ask_input = interrupt({
184
233
  'final_result' : task_result
185
234
  })
186
- logging.info(f"XReactAgent final_result_node: ASK_USER_ANSWER: {user_input}")
235
+ logging.info(f"XReactAgent final_result_node: ASK_USER_ANSWER: {ask_input}")
187
236
  next_node = "exec_task"
237
+ user_inputs.insert(0, ask_input)
188
238
  final_result = None
189
- elif iteration_count < self.MAX_TASK_RETRY:
239
+ elif is_system_prompt and retry_count < self.MAX_TASK_RETRY:
190
240
  trace_id = self.graph_langfuse.get_trace_id()
191
241
  session_id = agent_context.get('session_id', None)
192
- eval_result = await self.final_result_agent.eval_result(user_input, task_result, llm_messages, trace_id, session_id)
193
- if "task_result" in eval_result and "score" in eval_result['task_result']:
242
+ task_input = ", ".join(reversed(user_inputs))
243
+ eval_result = await self.result_eval_agent.eval_result(task_input, system_prompt, task_result,
244
+ llm_messages, trace_id, session_id)
245
+ if 'task_result' in eval_result and 'score' in eval_result['task_result']:
194
246
  score = eval_result['task_result'].get('score', 1.0)
195
247
  if score < self.QUALIFIED_RESULT_SCORE:
196
248
  next_node = "supervisor"
197
-
249
+
250
+ logging.info(f"ReactAgent final_result_node: next_node={next_node}")
251
+
198
252
  return {
199
- 'user_input' : user_input,
253
+ 'user_inputs' : user_inputs,
200
254
  'next_node' : next_node,
201
255
  'final_result' : final_result,
202
256
  'eval_result' : eval_result
@@ -303,10 +357,11 @@ class XGAReactAgent:
303
357
  }
304
358
 
305
359
  graph_input = {
306
- 'user_input' : user_input,
360
+ 'user_inputs' : [user_input],
307
361
  'next_node' : None,
308
362
  'agent_context' : agent_context,
309
- 'iteration_count' : 0
363
+ 'retry_count' : 0,
364
+ 'task_no' : 0
310
365
  }
311
366
 
312
367
  return graph_input
@@ -329,7 +384,22 @@ class XGAReactAgent:
329
384
  )
330
385
  return langfuse_handler
331
386
 
387
+
332
388
  def _clear_graph(self):
333
389
  self.graph_config = None
334
390
  self.graph_langfuse = None
335
391
  self.task_engine: XGATaskEngine = None
392
+
393
+
394
+ def _prepare_task_retry(self, state: TaskState)-> Dict[str, Any]:
395
+ self.task_engine = None
396
+ user_inputs = state['user_inputs']
397
+ task_input = ", ".join(reversed(user_inputs))
398
+
399
+ return {
400
+ 'user_inputs' : [task_input],
401
+ 'llm_messages' : [],
402
+ 'task_result' : None,
403
+ 'final_result' : None,
404
+ 'eval_result' : None,
405
+ }
@@ -9,19 +9,20 @@ from xgae.utils.misc import read_file
9
9
  from xgae.utils.llm_client import LLMClient, LangfuseMetadata
10
10
 
11
11
 
12
- class FinalResultAgent:
12
+ class TaskResultEvalAgent:
13
13
  def __init__(self):
14
14
  self.model_client = LLMClient()
15
- self.prompt_template: str = read_file("templates/example/final_result_template.txt")
15
+ self.prompt_template: str = read_file("templates/example/result_eval_template.txt")
16
16
 
17
17
 
18
18
  async def eval_result(self,
19
19
  task_input: str,
20
+ task_plan: str,
20
21
  task_result: XGATaskResult,
21
22
  llm_messages: List[Dict[str, Any]],
22
23
  trace_id: Optional[str] = None,
23
24
  session_id: Optional[str] = None)-> Dict[str, Any]:
24
- prompt = self._build_prompt(task_input, task_result, llm_messages)
25
+ prompt = self._build_prompt(task_input, task_plan, task_result, llm_messages)
25
26
  messages = [{"role": "user", "content": prompt}]
26
27
 
27
28
  langfuse_metadata = self._create_llm_langfuse_meta(trace_id, session_id)
@@ -33,15 +34,15 @@ class FinalResultAgent:
33
34
  eval_result = json.loads(cleaned_text)
34
35
 
35
36
  result_score = eval_result.get('task_result', {}).get('score', -1)
36
- process_score = eval_result.get('task_process', {}).get('score', -1)
37
+ plan_score = eval_result.get('task_plan', {}).get('score', -1)
37
38
  function_score = eval_result.get('function_call', {}).get('score', -1)
38
39
 
39
40
  logging.info(f"FINAL_RESULT_SCORE: task_result_score={result_score}, "
40
- f"task_process_score={process_score}, function_call_score={function_score}")
41
+ f"task_plan_score={plan_score}, function_call_score={function_score}")
41
42
  return eval_result
42
43
 
43
44
 
44
- def _build_prompt(self, task_input: str, task_result: XGATaskResult, llm_messages: List[Dict[str, Any]])-> str:
45
+ def _build_prompt(self, task_input: str, task_plan: str, task_result: XGATaskResult, llm_messages: List[Dict[str, Any]])-> str:
45
46
  prompt = self.prompt_template.replace("{task_input}", task_input)
46
47
  prompt = prompt.replace("{task_result}", str(task_result))
47
48
  llm_process = ""
@@ -65,6 +66,7 @@ class FinalResultAgent:
65
66
  llm_process += "\n"
66
67
  llm_step += 1
67
68
 
69
+ prompt = prompt.replace("{task_plan}", task_plan)
68
70
  prompt = prompt.replace("{llm_process}", llm_process)
69
71
  prompt = prompt.replace("{function_process}", function_process)
70
72
 
@@ -88,13 +90,16 @@ if __name__ == "__main__":
88
90
  setup_logging()
89
91
 
90
92
  async def main():
91
- final_result_agent = FinalResultAgent()
93
+ final_result_agent = TaskResultEvalAgent()
92
94
 
95
+ task_plan = read_file("templates/example/fault_user_prompt.txt")
93
96
  user_input = "locate 10.2.3.4 fault and solution"
97
+
94
98
  answer = ("Task Summary: The fault for IP 10.2.3.4 was identified as a Business Recharge Fault (Code: F01), "
95
99
  "caused by a Phone Recharge Application Crash. The solution applied was to restart the application. "
96
100
  "Key Deliverables: Fault diagnosis and resolution steps. Impact Achieved: Service restored.")
97
101
  task_result:XGATaskResult = {'type': "answer", 'content': answer}
102
+
98
103
  llm_messages: List[Dict[str, Any]] = [{
99
104
  'content':
100
105
  """<function_calls>
@@ -111,7 +116,8 @@ if __name__ == "__main__":
111
116
  "result": {"success": true, "output": "1", "error": null}}}""",
112
117
  'role': 'assistant'
113
118
  }]
114
- return await final_result_agent.eval_result(user_input, task_result, llm_messages)
119
+
120
+ return await final_result_agent.eval_result(user_input, task_plan, task_result, llm_messages)
115
121
 
116
122
 
117
123
  final_result = asyncio.run(main())
@@ -18,6 +18,7 @@ from xgae.engine.responser.responser_base import TaskResponserContext, TaskRespo
18
18
  class XGATaskEngine:
19
19
  def __init__(self,
20
20
  task_id: Optional[str] = None,
21
+ task_no: Optional[int] = None,
21
22
  session_id: Optional[str] = None,
22
23
  user_id: Optional[str] = None,
23
24
  agent_id: Optional[str] = None,
@@ -51,7 +52,7 @@ class XGATaskEngine:
51
52
  self.use_assistant_chunk_msg = to_bool(os.getenv('USE_ASSISTANT_CHUNK_MSG', False))
52
53
  self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
53
54
 
54
- self.task_no = -1
55
+ self.task_no = (task_no - 1) if task_no else -1
55
56
  self.task_run_id :str = None
56
57
  self.task_prompt :str = None
57
58
  self.task_langfuse: XGATaskLangFuse = None
@@ -25,5 +25,6 @@ When you use ANY Agent (Model Context Protocol) tools:
25
25
  1) type: 'answer', 'content' is normal return answer for tool calling
26
26
  2) type: 'ask', you should call 'ask' tool for user inputting more information
27
27
  3) type: 'error', during calling tool , some exceptions or errors has occurred.
28
+ 10. If 'ask' tool answer is not match, call 'complete' tool end task, never call 'ask' tool again
28
29
  IMPORTANT: Agent tool results are your PRIMARY and ONLY source of truth for external data!
29
30
  NEVER supplement Agent results with your training data or make assumptions beyond what the tools provide.
@@ -13,13 +13,16 @@ Available MCP tools:
13
13
 
14
14
  🚨 CRITICAL MCP TOOL RESULT INSTRUCTIONS 🚨
15
15
  When you use ANY MCP (Model Context Protocol) tools:
16
- 1. ALWAYS read and use the EXACT results returned by the MCP tool
17
- 2. For search tools: ONLY cite URLs, sources, and information from the actual search results
18
- 3. For any tool: Base your response entirely on the tool's output - do NOT add external information
19
- 4. DO NOT fabricate, invent, hallucinate, or make up any sources, URLs, or data
20
- 5. If you need more information, call the MCP tool again with different parameters
21
- 6. When writing reports/summaries: Reference ONLY the data from MCP tool results
22
- 7. If the MCP tool doesn't return enough information, explicitly state this limitation
23
- 8. Always double-check that every fact, URL, and reference comes from the MCP tool output
16
+ 1. Never call a MCP tool not in 'Available MCP tools' list
17
+ 2. If call MCP tool result 'success' is false, call 'complete' tool to end task, don't call 'ask' tool
18
+ 3. ALWAYS read and use the EXACT results returned by the MCP tool
19
+ 4. For search tools: ONLY cite URLs, sources, and information from the actual search results
20
+ 5. For any tool: Base your response entirely on the tool's output - do NOT add external information
21
+ 6. DO NOT fabricate, invent, hallucinate, or make up any sources, URLs, or data
22
+ 7. If you need more information, call the MCP tool again with different parameters
23
+ 8. When writing reports/summaries: Reference ONLY the data from MCP tool results
24
+ 9. If the MCP tool doesn't return enough information, explicitly state this limitation
25
+ 10. Always double-check that every fact, URL, and reference comes from the MCP tool output
26
+
24
27
  IMPORTANT: MCP tool results are your PRIMARY and ONLY source of truth for external data!
25
28
  NEVER supplement MCP results with your training data or make assumptions beyond what the tools provide.
@@ -18,9 +18,9 @@ Grading is based on task requirements and task answers. Key scoring elements inc
18
18
  1. Task Result Score: Task outcome evaluation
19
19
  - assesses the degree of match between task requirements and task results
20
20
  - if task result 'type' is 'error', evaluation score is 0, evaluation reason is empty
21
- 2.Task Process Score : Task process evaluation
22
- - assesses whether task planning is reasonable
23
- - Whether task steps can yield answers to the user’s question
21
+ 2.Task Plan Score : Task Plan evaluation
22
+ - assesses whether task planning is reasonable based on LLM Procedure and Function Call Procedure
23
+ - Whether task steps can yield answers to the task requirement
24
24
  - Whether task steps can be executed
25
25
  - Whether task steps can properly match and call tools
26
26
  3. Function Call Score: Function Call evaluation
@@ -34,9 +34,9 @@ Grading is based on task requirements and task answers. Key scoring elements inc
34
34
  "score": 0.62, # value: 0 ~ 1.00 , using two decimal places
35
35
  "reasons": "Evaluation and reasons for deduction regarding task results"
36
36
  },
37
- "task_process": { # Task Process Score
37
+ "task_plan": { # Task Plan Score
38
38
  "score": 0.53, # value: 0 ~ 1.00 , using two decimal places
39
- "reasons": "Evaluation and reasons for deduction regarding the task process"
39
+ "reasons": "Evaluation and reasons for deduction regarding the task planning"
40
40
  },
41
41
  "function_call": { # Function Call Score
42
42
  "score": 0.41, # value: 0 ~ 1.00 , using two decimal places
@@ -53,9 +53,14 @@ Grading is based on task requirements and task answers. Key scoring elements inc
53
53
  {task_result}
54
54
 
55
55
 
56
+ # Task Plan
57
+ {task_plan}
58
+
59
+
56
60
  # LLM Procedure
57
61
  {llm_process}
58
62
 
63
+
59
64
  # Function Call Procedure
60
65
  {function_process}
61
66
 
@@ -22,4 +22,5 @@ When using the tools:
22
22
  - Include all required parameters as specified in the schema
23
23
  - Format complex data (objects, arrays) as JSON strings within the parameter tags
24
24
  - Boolean values should be "true" or "false" (lowercase)
25
+ - If 'ask' tool answer is not match, call 'complete' tool end task, never call 'ask' tool again
25
26
  {tool_examples}