xgae 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xgae might be problematic. Click here for more details.
- xgae/__init__.py +4 -0
- xgae/cli_app.py +85 -0
- xgae/engine/engine_base.py +3 -3
- xgae/engine/mcp_tool_box.py +4 -4
- xgae/engine/responser/non_stream_responser.py +33 -38
- xgae/engine/responser/responser_base.py +42 -40
- xgae/engine/responser/stream_responser.py +95 -782
- xgae/engine/task_engine.py +98 -50
- xgae/engine/task_langfuse.py +8 -6
- xgae/tools/without_general_tools_app.py +2 -3
- xgae/utils/__init__.py +2 -2
- xgae/utils/json_helpers.py +2 -2
- xgae/utils/llm_client.py +42 -32
- xgae/utils/setup_env.py +4 -3
- {xgae-0.1.9.dist-info → xgae-0.1.12.dist-info}/METADATA +1 -1
- xgae-0.1.12.dist-info/RECORD +21 -0
- {xgae-0.1.9.dist-info → xgae-0.1.12.dist-info}/entry_points.txt +1 -0
- xgae-0.1.9.dist-info/RECORD +0 -20
- {xgae-0.1.9.dist-info → xgae-0.1.12.dist-info}/WHEEL +0 -0
xgae/engine/task_engine.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
from typing import List, Any, Dict, Optional, AsyncGenerator, Union, Literal
|
|
6
6
|
from uuid import uuid4
|
|
7
7
|
|
|
8
|
-
from xgae.utils import handle_error
|
|
8
|
+
from xgae.utils import handle_error, to_bool
|
|
9
9
|
from xgae.utils.llm_client import LLMClient, LLMConfig
|
|
10
10
|
from xgae.utils.json_helpers import format_for_yield
|
|
11
11
|
|
|
@@ -46,6 +46,8 @@ class XGATaskEngine:
|
|
|
46
46
|
|
|
47
47
|
max_auto_run = max_auto_run if max_auto_run else int(os.getenv("MAX_AUTO_RUN", 15))
|
|
48
48
|
self.max_auto_run: int = 1 if max_auto_run <= 1 else max_auto_run
|
|
49
|
+
|
|
50
|
+
self.use_assistant_chunk_msg = to_bool(os.getenv("USE_ASSISTANT_CHUNK_MSG", False))
|
|
49
51
|
self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
|
|
50
52
|
|
|
51
53
|
self.task_no = -1
|
|
@@ -67,7 +69,7 @@ class XGATaskEngine:
|
|
|
67
69
|
chunks.append(chunk)
|
|
68
70
|
|
|
69
71
|
if len(chunks) > 0:
|
|
70
|
-
final_result = self.
|
|
72
|
+
final_result = self.parse_final_result(chunks)
|
|
71
73
|
else:
|
|
72
74
|
final_result = XGATaskResult(type="error", content="LLM Answer is Empty")
|
|
73
75
|
|
|
@@ -117,22 +119,22 @@ class XGATaskEngine:
|
|
|
117
119
|
|
|
118
120
|
self.task_prompt = self.prompt_builder.build_task_prompt(self.model_name, general_tool_schemas, custom_tool_schemas)
|
|
119
121
|
|
|
120
|
-
logging.info("*" *
|
|
122
|
+
logging.info("*" * 10 + f" XGATaskEngine Task'{self.task_id}' Initialized " + "*" * 10)
|
|
121
123
|
logging.info(f"model_name={self.model_name}, is_stream={self.is_stream}")
|
|
122
124
|
logging.info(f"general_tools={general_tools}, custom_tools={custom_tools}")
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
async def _run_task_auto(self) -> AsyncGenerator[Dict[str, Any], None]:
|
|
126
|
-
def update_continuous_state(_auto_continue_count, _auto_continue):
|
|
127
|
-
continuous_state["auto_continue_count"] = _auto_continue_count
|
|
128
|
-
continuous_state["auto_continue"] = _auto_continue
|
|
129
|
-
|
|
130
128
|
continuous_state: TaskRunContinuousState = {
|
|
131
129
|
"accumulated_content": "",
|
|
132
130
|
"auto_continue_count": 0,
|
|
133
131
|
"auto_continue": False if self.max_auto_run <= 1 else True
|
|
134
132
|
}
|
|
135
133
|
|
|
134
|
+
def update_continuous_state(_auto_continue_count, _auto_continue):
|
|
135
|
+
continuous_state["auto_continue_count"] = _auto_continue_count
|
|
136
|
+
continuous_state["auto_continue"] = _auto_continue
|
|
137
|
+
|
|
136
138
|
auto_continue_count = 0
|
|
137
139
|
auto_continue = True
|
|
138
140
|
while auto_continue and auto_continue_count < self.max_auto_run:
|
|
@@ -146,34 +148,45 @@ class XGATaskEngine:
|
|
|
146
148
|
content = json.loads(chunk.get('content', '{}'))
|
|
147
149
|
status_type = content.get('status_type', None)
|
|
148
150
|
if status_type == "error":
|
|
149
|
-
logging.error(f"run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
|
|
151
|
+
logging.error(f"TaskEngine run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
|
|
150
152
|
auto_continue = False
|
|
151
153
|
break
|
|
152
154
|
elif status_type == 'finish':
|
|
153
155
|
finish_reason = content.get('finish_reason', None)
|
|
154
156
|
if finish_reason == 'completed':
|
|
155
|
-
logging.info(f"run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
|
|
157
|
+
logging.info(f"TaskEngine run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
|
|
156
158
|
auto_continue = False
|
|
157
159
|
break
|
|
158
160
|
elif finish_reason == 'xml_tool_limit_reached':
|
|
159
|
-
logging.warning(f"run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
|
|
161
|
+
logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
|
|
162
|
+
auto_continue = False
|
|
163
|
+
break
|
|
164
|
+
elif finish_reason == 'non_tool_call':
|
|
165
|
+
logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
|
|
160
166
|
auto_continue = False
|
|
161
167
|
break
|
|
162
168
|
elif finish_reason == 'stop' or finish_reason == 'length': # 'length' never occur
|
|
163
169
|
auto_continue = True
|
|
164
170
|
auto_continue_count += 1
|
|
165
171
|
update_continuous_state(auto_continue_count, auto_continue)
|
|
166
|
-
logging.info(f"run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
|
|
172
|
+
logging.info(f"TaskEngine run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
|
|
167
173
|
except Exception as parse_error:
|
|
168
|
-
logging.error(f"run_task_auto:
|
|
169
|
-
content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
|
|
174
|
+
logging.error(f"TaskEngine run_task_auto: Parse chunk error, chunk: {chunk}")
|
|
170
175
|
handle_error(parse_error)
|
|
176
|
+
self.task_langfuse.root_span.event(name="engine_parse_chunk_error", level="ERROR",
|
|
177
|
+
status_message=(f"Task Engine parse chunk error: {parse_error}"),
|
|
178
|
+
metadata={"content": chunk})
|
|
179
|
+
|
|
180
|
+
content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
|
|
171
181
|
error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
|
|
172
182
|
yield format_for_yield(error_msg)
|
|
173
183
|
except Exception as run_error:
|
|
174
|
-
logging.error(f"run_task_auto: Call task_run_once error: {
|
|
175
|
-
content = {"role": "system", "status_type": "error", "message": "Call task_run_once error"}
|
|
184
|
+
logging.error(f"TaskEngine run_task_auto: Call task_run_once error: {run_error}")
|
|
176
185
|
handle_error(run_error)
|
|
186
|
+
self.task_langfuse.root_span.event(name="engine_task_run_once_error", level="ERROR",
|
|
187
|
+
status_message=(f"Call task_run_once error: {run_error}"))
|
|
188
|
+
|
|
189
|
+
content = {"role": "system", "status_type": "error", "message": "Call run_task_once error"}
|
|
177
190
|
error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
|
|
178
191
|
yield format_for_yield(error_msg)
|
|
179
192
|
|
|
@@ -191,21 +204,25 @@ class XGATaskEngine:
|
|
|
191
204
|
}
|
|
192
205
|
llm_messages.append(temp_assistant_message)
|
|
193
206
|
|
|
194
|
-
|
|
195
|
-
langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(
|
|
207
|
+
auto_count = continuous_state.get("auto_continue_count")
|
|
208
|
+
langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(auto_count)
|
|
196
209
|
|
|
210
|
+
self.task_langfuse.root_span.event(name="engine_start_create_completion", level="DEFAULT",
|
|
211
|
+
status_message=(f"Task Engine start create_completion llm_messages len={len(llm_messages)}"))
|
|
197
212
|
llm_response = await self.llm_client.create_completion(llm_messages, langfuse_metadata)
|
|
198
213
|
response_processor = self._create_response_processer()
|
|
199
214
|
|
|
200
215
|
async for chunk in response_processor.process_response(llm_response, llm_messages, continuous_state):
|
|
201
|
-
self._logging_reponse_chunk(chunk)
|
|
216
|
+
self._logging_reponse_chunk(chunk, auto_count)
|
|
202
217
|
yield chunk
|
|
203
218
|
|
|
204
|
-
def
|
|
219
|
+
def parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
|
|
205
220
|
final_result: XGATaskResult = None
|
|
221
|
+
reverse_chunks = reversed(chunks)
|
|
222
|
+
chunk = None
|
|
206
223
|
try:
|
|
207
224
|
finish_reason = ''
|
|
208
|
-
for chunk in
|
|
225
|
+
for chunk in reverse_chunks:
|
|
209
226
|
chunk_type = chunk.get("type")
|
|
210
227
|
if chunk_type == "status":
|
|
211
228
|
status_content = json.loads(chunk.get('content', '{}'))
|
|
@@ -215,10 +232,7 @@ class XGATaskEngine:
|
|
|
215
232
|
final_result = XGATaskResult(type="error", content=error)
|
|
216
233
|
elif status_type == "finish":
|
|
217
234
|
finish_reason = status_content.get('finish_reason', None)
|
|
218
|
-
|
|
219
|
-
error = "Completed due to over task max_auto_run limit !"
|
|
220
|
-
final_result = XGATaskResult(type="error", content=error)
|
|
221
|
-
elif chunk_type == "tool" and finish_reason in ['completed', 'stop']:
|
|
235
|
+
elif chunk_type == "tool" and finish_reason in ['completed', 'stop', 'xml_tool_limit_reached']:
|
|
222
236
|
tool_content = json.loads(chunk.get('content', '{}'))
|
|
223
237
|
tool_execution = tool_content.get('tool_execution')
|
|
224
238
|
tool_name = tool_execution.get('function_name')
|
|
@@ -238,22 +252,33 @@ class XGATaskEngine:
|
|
|
238
252
|
result_type = "answer" if success else "error"
|
|
239
253
|
result_content = f"Task execute '{tool_name}' {result_type}: {output}"
|
|
240
254
|
final_result = XGATaskResult(type=result_type, content=result_content)
|
|
241
|
-
elif chunk_type == "
|
|
255
|
+
elif chunk_type == "assistant" and finish_reason == 'non_tool_call':
|
|
242
256
|
assis_content = chunk.get('content', {})
|
|
243
257
|
result_content = assis_content.get("content", "LLM output is empty")
|
|
244
258
|
final_result = XGATaskResult(type="answer", content=result_content)
|
|
245
259
|
|
|
246
|
-
if final_result
|
|
260
|
+
if final_result:
|
|
247
261
|
break
|
|
262
|
+
|
|
263
|
+
if final_result and finish_reason == "completed":
|
|
264
|
+
logging.info(f"✅ FINAL_RESULT: finish_reason={finish_reason}, final_result={final_result}")
|
|
265
|
+
elif final_result is not None:
|
|
266
|
+
logging.warning(f"⚠️ FINAL_RESULT: finish_reason={finish_reason}, final_result={final_result}")
|
|
267
|
+
else:
|
|
268
|
+
logging.warning(f"❌ FINAL_RESULT: LLM Result is EMPTY, finish_reason={finish_reason}")
|
|
269
|
+
final_result = XGATaskResult(type="error", content="LLM has no answer")
|
|
248
270
|
except Exception as e:
|
|
249
|
-
logging.error(f"parse_final_result:
|
|
250
|
-
final_result = XGATaskResult(type="error", content="Parse final result failed!")
|
|
271
|
+
logging.error(f"TaskEngine parse_final_result: Parse message chunk error, chunk: {chunk}")
|
|
251
272
|
handle_error(e)
|
|
273
|
+
self.task_langfuse.root_span.event(name="engine_parse_final_result_error", level="ERROR",
|
|
274
|
+
status_message=(f"Task Engine parse final result error: {e}"),
|
|
275
|
+
metadata={"content": chunk})
|
|
252
276
|
|
|
253
|
-
|
|
277
|
+
final_result = XGATaskResult(type="error", content="Parse final result failed!")
|
|
254
278
|
|
|
279
|
+
return final_result
|
|
255
280
|
|
|
256
|
-
def
|
|
281
|
+
def create_response_message(self, type: XGAResponseMsgType,
|
|
257
282
|
content: Union[Dict[str, Any], List[Any], str],
|
|
258
283
|
is_llm_message: bool,
|
|
259
284
|
metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
|
|
@@ -271,10 +296,17 @@ class XGATaskEngine:
|
|
|
271
296
|
content = content,
|
|
272
297
|
metadata = metadata
|
|
273
298
|
)
|
|
274
|
-
self.task_response_msgs.append(message)
|
|
275
299
|
|
|
276
300
|
return message
|
|
277
301
|
|
|
302
|
+
def add_response_message(self, type: XGAResponseMsgType,
|
|
303
|
+
content: Union[Dict[str, Any], List[Any], str],
|
|
304
|
+
is_llm_message: bool,
|
|
305
|
+
metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
|
|
306
|
+
message = self.create_response_message(type, content, is_llm_message, metadata)
|
|
307
|
+
self.task_response_msgs.append(message)
|
|
308
|
+
return message
|
|
309
|
+
|
|
278
310
|
def get_history_llm_messages (self) -> List[Dict[str, Any]]:
|
|
279
311
|
llm_messages = []
|
|
280
312
|
for message in self.task_response_msgs:
|
|
@@ -284,13 +316,12 @@ class XGATaskEngine:
|
|
|
284
316
|
response_llm_contents = []
|
|
285
317
|
for llm_message in llm_messages:
|
|
286
318
|
content = llm_message["content"]
|
|
287
|
-
# @todo content List type
|
|
288
319
|
if isinstance(content, str):
|
|
289
320
|
try:
|
|
290
321
|
_content = json.loads(content)
|
|
291
322
|
response_llm_contents.append(_content)
|
|
292
323
|
except json.JSONDecodeError as e:
|
|
293
|
-
logging.error(f"
|
|
324
|
+
logging.error(f"TaskEngine get_history_llm_messages: Failed to decode json, content: {content}")
|
|
294
325
|
handle_error(e)
|
|
295
326
|
else:
|
|
296
327
|
response_llm_contents.append(content)
|
|
@@ -315,9 +346,11 @@ class XGATaskEngine:
|
|
|
315
346
|
"task_no": self.task_no,
|
|
316
347
|
"model_name": self.model_name,
|
|
317
348
|
"max_xml_tool_calls": 0,
|
|
349
|
+
"use_assistant_chunk_msg": self.use_assistant_chunk_msg,
|
|
318
350
|
"tool_execution_strategy": "parallel" if self.tool_exec_parallel else "sequential", # ,
|
|
319
351
|
"xml_adding_strategy": "user_message",
|
|
320
352
|
"add_response_msg_func": self.add_response_message,
|
|
353
|
+
"create_response_msg_func": self.create_response_message,
|
|
321
354
|
"tool_box": self.tool_box,
|
|
322
355
|
"task_langfuse": self.task_langfuse,
|
|
323
356
|
}
|
|
@@ -328,37 +361,52 @@ class XGATaskEngine:
|
|
|
328
361
|
return XGATaskLangFuse(self.session_id, self.task_id, self.task_run_id, self.task_no, self.agent_id)
|
|
329
362
|
|
|
330
363
|
|
|
331
|
-
def _logging_reponse_chunk(self, chunk):
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
364
|
+
def _logging_reponse_chunk(self, chunk, auto_count: int)-> None:
|
|
365
|
+
try:
|
|
366
|
+
chunk_type = chunk.get('type', 'unknown')
|
|
367
|
+
prefix = ""
|
|
368
|
+
if chunk_type == 'status':
|
|
369
|
+
content = json.loads(chunk.get('content', '{}'))
|
|
370
|
+
status_type = content.get('status_type', "empty")
|
|
371
|
+
if status_type in ["tool_started", "tool_completed"]:
|
|
372
|
+
return
|
|
373
|
+
prefix = "-" + status_type
|
|
374
|
+
elif chunk_type == 'tool':
|
|
375
|
+
tool_content = json.loads(chunk.get('content', '{}'))
|
|
376
|
+
tool_execution = tool_content.get('tool_execution')
|
|
377
|
+
tool_name = tool_execution.get('function_name')
|
|
378
|
+
prefix = "-" + tool_name
|
|
379
|
+
|
|
380
|
+
content = chunk.get('content', '')
|
|
381
|
+
pretty_content = content
|
|
382
|
+
if isinstance(content, dict):
|
|
383
|
+
pretty_content = json.dumps(content, ensure_ascii=False, indent=2)
|
|
384
|
+
|
|
385
|
+
if chunk_type == "assistant_chunk":
|
|
386
|
+
logging.debug(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
|
|
387
|
+
else:
|
|
388
|
+
logging.info(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
|
|
344
389
|
|
|
345
|
-
|
|
390
|
+
except Exception as e:
|
|
391
|
+
logging.error(f"TaskEngine logging_reponse_chunk: Decorate chunk log error, chunk: {chunk}")
|
|
392
|
+
handle_error(e)
|
|
346
393
|
|
|
347
394
|
|
|
348
395
|
if __name__ == "__main__":
|
|
349
396
|
import asyncio
|
|
350
397
|
from xgae.utils.misc import read_file
|
|
398
|
+
from xgae.utils.setup_env import setup_logging
|
|
399
|
+
|
|
400
|
+
setup_logging()
|
|
351
401
|
|
|
352
402
|
async def main():
|
|
353
403
|
# Before Run Exec: uv run custom_fault_tools
|
|
354
404
|
tool_box = XGAMcpToolBox(custom_mcp_server_file="mcpservers/custom_servers.json")
|
|
355
|
-
system_prompt = read_file("templates/
|
|
405
|
+
system_prompt = read_file("templates/example/fault_user_prompt.txt")
|
|
356
406
|
engine = XGATaskEngine(tool_box=tool_box,
|
|
357
407
|
general_tools=[],
|
|
358
408
|
custom_tools=["*"],
|
|
359
|
-
llm_config=LLMConfig(stream=False),
|
|
360
409
|
system_prompt=system_prompt,
|
|
361
|
-
max_auto_run=8,
|
|
362
410
|
session_id="session_1",
|
|
363
411
|
agent_id="agent_1",)
|
|
364
412
|
|
xgae/engine/task_langfuse.py
CHANGED
|
@@ -2,20 +2,22 @@
|
|
|
2
2
|
from typing import Any, Dict, Optional
|
|
3
3
|
from langfuse import Langfuse
|
|
4
4
|
|
|
5
|
-
from xgae.utils.setup_env import setup_langfuse
|
|
5
|
+
from xgae.utils.setup_env import setup_langfuse
|
|
6
6
|
from xgae.utils.llm_client import LangfuseMetadata
|
|
7
7
|
from xgae.engine.engine_base import XGATaskResult
|
|
8
8
|
|
|
9
|
-
setup_env_logging()
|
|
10
|
-
langfuse:Langfuse = setup_langfuse()
|
|
11
|
-
|
|
12
9
|
class XGATaskLangFuse:
|
|
10
|
+
langfuse: Langfuse = None
|
|
11
|
+
|
|
13
12
|
def __init__(self,
|
|
14
13
|
session_id: str,
|
|
15
14
|
task_id:str,
|
|
16
15
|
task_run_id: str,
|
|
17
16
|
task_no: int,
|
|
18
17
|
agent_id: str) -> None:
|
|
18
|
+
if XGATaskLangFuse.langfuse is None:
|
|
19
|
+
XGATaskLangFuse.langfuse = setup_langfuse()
|
|
20
|
+
|
|
19
21
|
self.session_id = session_id
|
|
20
22
|
self.task_id = task_id
|
|
21
23
|
self.task_run_id = task_run_id
|
|
@@ -35,9 +37,9 @@ class XGATaskLangFuse:
|
|
|
35
37
|
trace = None
|
|
36
38
|
if trace_id:
|
|
37
39
|
self.trace_id = trace_id
|
|
38
|
-
trace = langfuse.trace(id=trace_id)
|
|
40
|
+
trace = XGATaskLangFuse.langfuse.trace(id=trace_id)
|
|
39
41
|
else:
|
|
40
|
-
trace = langfuse.trace(name="xga_task_engine")
|
|
42
|
+
trace = XGATaskLangFuse.langfuse.trace(name="xga_task_engine")
|
|
41
43
|
self.trace_id = trace.id
|
|
42
44
|
|
|
43
45
|
metadata = {"task_id": self.task_id, "session_id": self.session_id, "agent_id": self.agent_id}
|
|
@@ -3,7 +3,6 @@ from pydantic import Field
|
|
|
3
3
|
|
|
4
4
|
from mcp.server.fastmcp import FastMCP
|
|
5
5
|
|
|
6
|
-
from xgae.engine.engine_base import XGAToolResult
|
|
7
6
|
|
|
8
7
|
mcp = FastMCP(name="XGAE Message Tools")
|
|
9
8
|
|
|
@@ -17,7 +16,7 @@ async def complete(task_id: str,
|
|
|
17
16
|
description="Comma-separated list of final outputs. Use when: 1) Completion relates to files 2) User needs to review outputs 3) Deliverables in files")]
|
|
18
17
|
):
|
|
19
18
|
print(f"<XGAETools-complete>: task_id={task_id}, text={text}, attachments={attachments}")
|
|
20
|
-
return
|
|
19
|
+
return {"status": "complete"}
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
@mcp.tool(
|
|
@@ -30,7 +29,7 @@ async def ask(task_id: str,
|
|
|
30
29
|
description="Comma-separated list of files/URLs to attach. Use when: 1) Question relates to files/configs 2) User needs to review content 3) Options documented in files 4) Supporting evidence needed")]
|
|
31
30
|
):
|
|
32
31
|
print(f"<XGAETools-ask>: task_id={task_id}, text={text}, attachments={attachments}")
|
|
33
|
-
return
|
|
32
|
+
return {"status": "Awaiting user response..."}
|
|
34
33
|
|
|
35
34
|
@mcp.tool(
|
|
36
35
|
description="end task, destroy sandbox"
|
xgae/utils/__init__.py
CHANGED
|
@@ -8,8 +8,8 @@ def handle_error(e: Exception) -> None:
|
|
|
8
8
|
raise (e) from e
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def to_bool(value:
|
|
11
|
+
def to_bool(value: any) -> bool:
|
|
12
12
|
if value is None:
|
|
13
13
|
return False
|
|
14
14
|
|
|
15
|
-
return
|
|
15
|
+
return str(value).lower() == "true"
|
xgae/utils/json_helpers.py
CHANGED
|
@@ -159,10 +159,10 @@ def format_for_yield(message_object: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
159
159
|
|
|
160
160
|
# Ensure content is a JSON string
|
|
161
161
|
if 'content' in formatted and not isinstance(formatted['content'], str):
|
|
162
|
-
formatted['content'] = json.dumps(formatted['content'])
|
|
162
|
+
formatted['content'] = json.dumps(formatted['content'], ensure_ascii=False, indent=2)
|
|
163
163
|
|
|
164
164
|
# Ensure metadata is a JSON string
|
|
165
165
|
if 'metadata' in formatted and not isinstance(formatted['metadata'], str):
|
|
166
|
-
formatted['metadata'] = json.dumps(formatted['metadata'])
|
|
166
|
+
formatted['metadata'] = json.dumps(formatted['metadata'], ensure_ascii=False, indent=2)
|
|
167
167
|
|
|
168
168
|
return formatted
|
xgae/utils/llm_client.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import Union, Dict, Any, Optional, List, TypedDict
|
|
|
8
8
|
from openai import OpenAIError
|
|
9
9
|
from litellm.utils import ModelResponse, CustomStreamWrapper
|
|
10
10
|
|
|
11
|
+
from xgae.utils import to_bool
|
|
11
12
|
from xgae.utils.setup_env import setup_langfuse
|
|
12
13
|
|
|
13
14
|
class LLMConfig(TypedDict, total=False):
|
|
@@ -18,8 +19,8 @@ class LLMConfig(TypedDict, total=False):
|
|
|
18
19
|
api_base: str # Optional API base URL, Override .env LLM_API_BASE
|
|
19
20
|
temperature: float # temperature: Optional Sampling temperature (0-1), Override .env LLM_TEMPERATURE
|
|
20
21
|
max_tokens: int # max_tokens: Optional Maximum tokens in the response, Override .env LLM_MAX_TOKENS
|
|
21
|
-
stream: bool # stream: Optional whether to stream the response,
|
|
22
|
-
enable_thinking: bool # Optional whether to enable thinking,
|
|
22
|
+
stream: bool # stream: Optional whether to stream the response, Override .env LLM_STREAM
|
|
23
|
+
enable_thinking: bool # Optional whether to enable thinking, Override .env LLM_ENABLE_THINKING
|
|
23
24
|
reasoning_effort: str # Optional level of reasoning effort, default is ‘low’
|
|
24
25
|
response_format: str # response_format: Optional desired format for the response, default is None
|
|
25
26
|
top_p: int # Optional Top-p sampling parameter, default is None
|
|
@@ -46,6 +47,7 @@ class LLMClient:
|
|
|
46
47
|
def __init__(self, llm_config: LLMConfig=None):
|
|
47
48
|
litellm.modify_params = True
|
|
48
49
|
litellm.drop_params = True
|
|
50
|
+
|
|
49
51
|
self._init_langfuse()
|
|
50
52
|
|
|
51
53
|
llm_config = llm_config or LLMConfig()
|
|
@@ -56,6 +58,8 @@ class LLMClient:
|
|
|
56
58
|
env_llm_api_base = os.getenv("LLM_API_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
|
57
59
|
env_llm_max_tokens = int(os.getenv("LLM_MAX_TOKENS", 16384))
|
|
58
60
|
env_llm_temperature = float(os.getenv("LLM_TEMPERATURE", 0.7))
|
|
61
|
+
env_llm_stream = to_bool(os.getenv("LLM_STREAM", False))
|
|
62
|
+
env_llm_enable_thinking = to_bool(os.getenv("LLM_ENABLE_THINKING", False))
|
|
59
63
|
|
|
60
64
|
llm_config_params = {
|
|
61
65
|
"model": llm_config.get("model", env_llm_model),
|
|
@@ -65,8 +69,8 @@ class LLMClient:
|
|
|
65
69
|
"api_base": llm_config.get("api_base", env_llm_api_base),
|
|
66
70
|
"temperature": llm_config.get("temperature", env_llm_temperature),
|
|
67
71
|
"max_tokens": llm_config.get("max_tokens", env_llm_max_tokens),
|
|
68
|
-
"stream": llm_config.get("stream",
|
|
69
|
-
"enable_thinking": llm_config.get("enable_thinking",
|
|
72
|
+
"stream": llm_config.get("stream", env_llm_stream),
|
|
73
|
+
"enable_thinking": llm_config.get("enable_thinking", env_llm_enable_thinking),
|
|
70
74
|
"reasoning_effort": llm_config.get("reasoning_effort", 'low'),
|
|
71
75
|
"response_format": llm_config.get("response_format", None),
|
|
72
76
|
"top_p": llm_config.get("top_p", None),
|
|
@@ -78,22 +82,27 @@ class LLMClient:
|
|
|
78
82
|
self.is_stream = llm_config_params['stream']
|
|
79
83
|
|
|
80
84
|
self.lite_llm_params = self._prepare_llm_params(llm_config_params)
|
|
81
|
-
logging.info(f"
|
|
85
|
+
logging.info(f"=== LLMClient initialed : model={self.model_name}, is_stream={self.is_stream}, enable thinking={self.lite_llm_params['enable_thinking']}")
|
|
82
86
|
|
|
83
87
|
@staticmethod
|
|
84
88
|
def _init_langfuse():
|
|
85
89
|
if not LLMClient.langfuse_inited:
|
|
86
90
|
LLMClient.langfuse_inited =True
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
91
|
+
|
|
92
|
+
env_llm_langfuse_enable = to_bool(os.getenv("LLM_LANGFUSE_ENABLE", False))
|
|
93
|
+
if env_llm_langfuse_enable:
|
|
94
|
+
env_langfuse = setup_langfuse()
|
|
95
|
+
if env_langfuse and env_langfuse.enabled:
|
|
96
|
+
litellm.success_callback = ["langfuse"]
|
|
97
|
+
litellm.failure_callback = ["langfuse"]
|
|
98
|
+
LLMClient.langfuse_enabled = True
|
|
99
|
+
logging.info("🛠️ LiteLLM Langfuse is enable !")
|
|
100
|
+
else:
|
|
101
|
+
LLMClient.langfuse_enabled = False
|
|
102
|
+
logging.warning("🛠️ LiteLLM Langfuse is disable, langfuse.enabled=false !")
|
|
93
103
|
else:
|
|
94
104
|
LLMClient.langfuse_enabled = False
|
|
95
|
-
logging.warning("
|
|
96
|
-
|
|
105
|
+
logging.warning("🛠️ LiteLLM Langfuse is disable, LLM_LANGFUSE_ENABLE=False !")
|
|
97
106
|
|
|
98
107
|
def _prepare_llm_params(self, llm_config_params: Dict[str, Any]) -> Dict[str, Any]:
|
|
99
108
|
prepared_llm_params = llm_config_params.copy()
|
|
@@ -108,27 +117,27 @@ class LLMClient:
|
|
|
108
117
|
# as it causes errors with inference profiles
|
|
109
118
|
if model_name.startswith("bedrock/") and "claude-3-7" in model_name:
|
|
110
119
|
prepared_llm_params.pop("max_tokens")
|
|
111
|
-
logging.debug(f"prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
|
|
120
|
+
logging.debug(f"LLMClient prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
|
|
112
121
|
else:
|
|
113
122
|
is_openai_o_series = 'o1' in model_name
|
|
114
123
|
is_openai_gpt5 = 'gpt-5' in model_name
|
|
115
124
|
param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens"
|
|
116
125
|
if param_name == "max_completion_tokens":
|
|
117
126
|
prepared_llm_params[param_name] = max_tokens
|
|
118
|
-
logging.debug(f"prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
|
|
127
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
|
|
119
128
|
|
|
120
129
|
# # Add Claude-specific headers
|
|
121
130
|
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
|
122
131
|
prepared_llm_params["extra_headers"] = {
|
|
123
132
|
"anthropic-beta": "output-128k-2025-02-19"
|
|
124
133
|
}
|
|
125
|
-
logging.debug(f"prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
|
|
134
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
|
|
126
135
|
|
|
127
136
|
# Add Bedrock-specific parameters
|
|
128
137
|
if model_name.startswith("bedrock/"):
|
|
129
138
|
if not model_id and "anthropic.claude-3-7-sonnet" in model_name:
|
|
130
139
|
prepared_llm_params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
|
|
131
|
-
logging.debug(f"prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
|
|
140
|
+
logging.debug(f"LLMClient prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
|
|
132
141
|
|
|
133
142
|
# Apply Anthropic prompt caching (minimal implementation)
|
|
134
143
|
effective_model_name = llm_config_params.get("model", model_name)
|
|
@@ -136,14 +145,14 @@ class LLMClient:
|
|
|
136
145
|
# OpenAI GPT-5: drop unsupported temperature param (only default 1 allowed)
|
|
137
146
|
if "gpt-5" in effective_model_name and "temperature" in llm_config_params and llm_config_params["temperature"] != 1:
|
|
138
147
|
prepared_llm_params.pop("temperature", None)
|
|
139
|
-
logging.debug(f"prepare_llm_params: Remove 'temperature' param for model: {model_name}")
|
|
148
|
+
logging.debug(f"LLMClient prepare_llm_params: Remove 'temperature' param for model: {model_name}")
|
|
140
149
|
|
|
141
150
|
# OpenAI GPT-5: request priority service tier when calling OpenAI directly
|
|
142
151
|
# Pass via both top-level and extra_body for LiteLLM compatibility
|
|
143
152
|
if "gpt-5" in effective_model_name and not effective_model_name.startswith("openrouter/"):
|
|
144
153
|
prepared_llm_params["service_tier"] = "priority"
|
|
145
154
|
prepared_llm_params["extra_body"] = {"service_tier": "priority"}
|
|
146
|
-
logging.debug(f"prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
|
|
155
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
|
|
147
156
|
|
|
148
157
|
# Add reasoning_effort for Anthropic models if enabled
|
|
149
158
|
enable_thinking = llm_config_params.get("enable_thinking")
|
|
@@ -156,14 +165,14 @@ class LLMClient:
|
|
|
156
165
|
prepared_llm_params["provider"] = {
|
|
157
166
|
"order": ["together/fp8", "novita/fp8", "baseten/fp8", "moonshotai", "groq"]
|
|
158
167
|
}
|
|
159
|
-
logging.debug(f"prepare_llm_params: Add 'provider' param for model: {model_name}")
|
|
168
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'provider' param for model: {model_name}")
|
|
160
169
|
|
|
161
170
|
reasoning_effort = llm_config_params.get("reasoning_effort")
|
|
162
171
|
if is_anthropic and use_thinking:
|
|
163
172
|
effort_level = reasoning_effort if reasoning_effort else 'low'
|
|
164
173
|
prepared_llm_params["reasoning_effort"] = effort_level
|
|
165
174
|
prepared_llm_params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used
|
|
166
|
-
logging.debug(f"prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
|
|
175
|
+
logging.debug(f"LLMClient prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
|
|
167
176
|
|
|
168
177
|
return prepared_llm_params
|
|
169
178
|
|
|
@@ -197,7 +206,7 @@ class LLMClient:
|
|
|
197
206
|
{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
|
|
198
207
|
]
|
|
199
208
|
cache_control_count += 1
|
|
200
|
-
logging.debug(f"prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
|
|
209
|
+
logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
|
|
201
210
|
elif isinstance(content, list):
|
|
202
211
|
for item in content:
|
|
203
212
|
if cache_control_count >= max_cache_control_blocks:
|
|
@@ -205,7 +214,7 @@ class LLMClient:
|
|
|
205
214
|
if isinstance(item, dict) and item.get("type") == "text" and "cache_control" not in item:
|
|
206
215
|
item["cache_control"] = {"type": "ephemeral"}
|
|
207
216
|
cache_control_count += 1
|
|
208
|
-
logging.debug(f"prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
|
|
217
|
+
logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
|
|
209
218
|
|
|
210
219
|
return complete_params
|
|
211
220
|
|
|
@@ -225,18 +234,18 @@ class LLMClient:
|
|
|
225
234
|
last_error = None
|
|
226
235
|
for attempt in range(self.max_retries):
|
|
227
236
|
try:
|
|
228
|
-
logging.info(f"*** create_completion
|
|
237
|
+
logging.info(f"*** LLMClient create_completion: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
|
|
229
238
|
response = await litellm.acompletion(**complete_params)
|
|
230
239
|
return response
|
|
231
240
|
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
|
|
232
241
|
last_error = e
|
|
233
242
|
await self._handle_llm_error(e, attempt)
|
|
234
243
|
except Exception as e:
|
|
235
|
-
logging.error(f"create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
|
|
236
|
-
raise LLMError(f"
|
|
244
|
+
logging.error(f"LLMClient create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
|
|
245
|
+
raise LLMError(f"LLMClient create completion failed: {e}")
|
|
237
246
|
|
|
238
|
-
logging.error(f"create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
|
|
239
|
-
raise LLMError(f"
|
|
247
|
+
logging.error(f"LLMClient create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
|
|
248
|
+
raise LLMError(f"LLMClient create completion failed after {self.max_retries} attempts !")
|
|
240
249
|
|
|
241
250
|
if __name__ == "__main__":
|
|
242
251
|
from xgae.utils.setup_env import setup_logging
|
|
@@ -244,14 +253,16 @@ if __name__ == "__main__":
|
|
|
244
253
|
setup_logging()
|
|
245
254
|
langfuse = setup_langfuse()
|
|
246
255
|
|
|
247
|
-
async def
|
|
256
|
+
async def main():
|
|
248
257
|
llm_client = LLMClient(LLMConfig(stream=False))
|
|
249
258
|
|
|
250
259
|
messages = [{"role": "user", "content": "1+1="}]
|
|
251
260
|
trace_id = langfuse.trace(name = "xgae_litellm_test").trace_id
|
|
261
|
+
await asyncio.sleep(1)
|
|
262
|
+
|
|
252
263
|
meta = LangfuseMetadata(
|
|
253
264
|
generation_name="llm_completion_test",
|
|
254
|
-
generation_id="
|
|
265
|
+
generation_id="generation_id_0",
|
|
255
266
|
existing_trace_id=trace_id,
|
|
256
267
|
session_id="session_0",
|
|
257
268
|
)
|
|
@@ -269,7 +280,6 @@ if __name__ == "__main__":
|
|
|
269
280
|
else:
|
|
270
281
|
print(response.choices[0].message.content)
|
|
271
282
|
|
|
272
|
-
|
|
273
|
-
asyncio.run(llm_completion())
|
|
283
|
+
asyncio.run(main())
|
|
274
284
|
|
|
275
285
|
|
xgae/utils/setup_env.py
CHANGED
|
@@ -52,7 +52,7 @@ def setup_logging(log_file: str=None, log_level: str="INFO") :
|
|
|
52
52
|
|
|
53
53
|
logger.setLevel(logging_level)
|
|
54
54
|
|
|
55
|
-
logging.info(f"
|
|
55
|
+
logging.info(f"🛠️ XGA_LOGGING is initialized, log_level={log_level}, log_file={log_file}")
|
|
56
56
|
|
|
57
57
|
def setup_env_logging():
|
|
58
58
|
log_enable = to_bool(os.getenv("LOG_ENABLE", True))
|
|
@@ -60,6 +60,7 @@ def setup_env_logging():
|
|
|
60
60
|
log_file = os.getenv("LOG_FILE", "log/xga.log")
|
|
61
61
|
if log_enable :
|
|
62
62
|
setup_logging(log_file, log_level)
|
|
63
|
+
setup_logging(log_file, log_level)
|
|
63
64
|
|
|
64
65
|
def setup_langfuse() -> Langfuse:
|
|
65
66
|
env_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
@@ -71,10 +72,10 @@ def setup_langfuse() -> Langfuse:
|
|
|
71
72
|
secret_key=env_secret_key,
|
|
72
73
|
host=env_host)
|
|
73
74
|
|
|
74
|
-
logging.info("
|
|
75
|
+
logging.info("🛠️ XGA_LANGFUSE initialized Successfully by Key !")
|
|
75
76
|
else:
|
|
76
77
|
_langfuse = Langfuse(enabled=False)
|
|
77
|
-
logging.warning("
|
|
78
|
+
logging.warning("🛠️ XGA_LANGFUSE Not set key, Langfuse is disabled!")
|
|
78
79
|
|
|
79
80
|
return _langfuse
|
|
80
81
|
|