xgae 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xgae might be problematic. Click here for more details.
- xgae/cli_app.py +2 -4
- xgae/engine/engine_base.py +3 -3
- xgae/engine/mcp_tool_box.py +4 -4
- xgae/engine/responser/non_stream_responser.py +31 -39
- xgae/engine/responser/responser_base.py +42 -40
- xgae/engine/responser/stream_responser.py +95 -782
- xgae/engine/task_engine.py +79 -46
- xgae/tools/without_general_tools_app.py +2 -3
- xgae/utils/__init__.py +2 -2
- xgae/utils/json_helpers.py +2 -2
- xgae/utils/llm_client.py +21 -19
- xgae/utils/setup_env.py +1 -0
- {xgae-0.1.10.dist-info → xgae-0.1.12.dist-info}/METADATA +1 -1
- xgae-0.1.12.dist-info/RECORD +21 -0
- xgae-0.1.10.dist-info/RECORD +0 -21
- {xgae-0.1.10.dist-info → xgae-0.1.12.dist-info}/WHEEL +0 -0
- {xgae-0.1.10.dist-info → xgae-0.1.12.dist-info}/entry_points.txt +0 -0
xgae/engine/task_engine.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
from typing import List, Any, Dict, Optional, AsyncGenerator, Union, Literal
|
|
6
6
|
from uuid import uuid4
|
|
7
7
|
|
|
8
|
-
from xgae.utils import handle_error
|
|
8
|
+
from xgae.utils import handle_error, to_bool
|
|
9
9
|
from xgae.utils.llm_client import LLMClient, LLMConfig
|
|
10
10
|
from xgae.utils.json_helpers import format_for_yield
|
|
11
11
|
|
|
@@ -46,6 +46,8 @@ class XGATaskEngine:
|
|
|
46
46
|
|
|
47
47
|
max_auto_run = max_auto_run if max_auto_run else int(os.getenv("MAX_AUTO_RUN", 15))
|
|
48
48
|
self.max_auto_run: int = 1 if max_auto_run <= 1 else max_auto_run
|
|
49
|
+
|
|
50
|
+
self.use_assistant_chunk_msg = to_bool(os.getenv("USE_ASSISTANT_CHUNK_MSG", False))
|
|
49
51
|
self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
|
|
50
52
|
|
|
51
53
|
self.task_no = -1
|
|
@@ -67,7 +69,7 @@ class XGATaskEngine:
|
|
|
67
69
|
chunks.append(chunk)
|
|
68
70
|
|
|
69
71
|
if len(chunks) > 0:
|
|
70
|
-
final_result = self.
|
|
72
|
+
final_result = self.parse_final_result(chunks)
|
|
71
73
|
else:
|
|
72
74
|
final_result = XGATaskResult(type="error", content="LLM Answer is Empty")
|
|
73
75
|
|
|
@@ -123,16 +125,16 @@ class XGATaskEngine:
|
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
async def _run_task_auto(self) -> AsyncGenerator[Dict[str, Any], None]:
|
|
126
|
-
def update_continuous_state(_auto_continue_count, _auto_continue):
|
|
127
|
-
continuous_state["auto_continue_count"] = _auto_continue_count
|
|
128
|
-
continuous_state["auto_continue"] = _auto_continue
|
|
129
|
-
|
|
130
128
|
continuous_state: TaskRunContinuousState = {
|
|
131
129
|
"accumulated_content": "",
|
|
132
130
|
"auto_continue_count": 0,
|
|
133
131
|
"auto_continue": False if self.max_auto_run <= 1 else True
|
|
134
132
|
}
|
|
135
133
|
|
|
134
|
+
def update_continuous_state(_auto_continue_count, _auto_continue):
|
|
135
|
+
continuous_state["auto_continue_count"] = _auto_continue_count
|
|
136
|
+
continuous_state["auto_continue"] = _auto_continue
|
|
137
|
+
|
|
136
138
|
auto_continue_count = 0
|
|
137
139
|
auto_continue = True
|
|
138
140
|
while auto_continue and auto_continue_count < self.max_auto_run:
|
|
@@ -146,38 +148,45 @@ class XGATaskEngine:
|
|
|
146
148
|
content = json.loads(chunk.get('content', '{}'))
|
|
147
149
|
status_type = content.get('status_type', None)
|
|
148
150
|
if status_type == "error":
|
|
149
|
-
logging.error(f"run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
|
|
151
|
+
logging.error(f"TaskEngine run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
|
|
150
152
|
auto_continue = False
|
|
151
153
|
break
|
|
152
154
|
elif status_type == 'finish':
|
|
153
155
|
finish_reason = content.get('finish_reason', None)
|
|
154
156
|
if finish_reason == 'completed':
|
|
155
|
-
logging.info(f"run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
|
|
157
|
+
logging.info(f"TaskEngine run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
|
|
156
158
|
auto_continue = False
|
|
157
159
|
break
|
|
158
160
|
elif finish_reason == 'xml_tool_limit_reached':
|
|
159
|
-
logging.warning(f"run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
|
|
161
|
+
logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
|
|
160
162
|
auto_continue = False
|
|
161
163
|
break
|
|
162
164
|
elif finish_reason == 'non_tool_call':
|
|
163
|
-
logging.warning(f"run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
|
|
165
|
+
logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
|
|
164
166
|
auto_continue = False
|
|
165
167
|
break
|
|
166
168
|
elif finish_reason == 'stop' or finish_reason == 'length': # 'length' never occur
|
|
167
169
|
auto_continue = True
|
|
168
170
|
auto_continue_count += 1
|
|
169
171
|
update_continuous_state(auto_continue_count, auto_continue)
|
|
170
|
-
logging.info(f"run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
|
|
172
|
+
logging.info(f"TaskEngine run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
|
|
171
173
|
except Exception as parse_error:
|
|
172
|
-
logging.error(f"run_task_auto:
|
|
173
|
-
content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
|
|
174
|
+
logging.error(f"TaskEngine run_task_auto: Parse chunk error, chunk: {chunk}")
|
|
174
175
|
handle_error(parse_error)
|
|
176
|
+
self.task_langfuse.root_span.event(name="engine_parse_chunk_error", level="ERROR",
|
|
177
|
+
status_message=(f"Task Engine parse chunk error: {parse_error}"),
|
|
178
|
+
metadata={"content": chunk})
|
|
179
|
+
|
|
180
|
+
content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
|
|
175
181
|
error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
|
|
176
182
|
yield format_for_yield(error_msg)
|
|
177
183
|
except Exception as run_error:
|
|
178
|
-
logging.error(f"run_task_auto: Call task_run_once error: {
|
|
179
|
-
content = {"role": "system", "status_type": "error", "message": "Call task_run_once error"}
|
|
184
|
+
logging.error(f"TaskEngine run_task_auto: Call task_run_once error: {run_error}")
|
|
180
185
|
handle_error(run_error)
|
|
186
|
+
self.task_langfuse.root_span.event(name="engine_task_run_once_error", level="ERROR",
|
|
187
|
+
status_message=(f"Call task_run_once error: {run_error}"))
|
|
188
|
+
|
|
189
|
+
content = {"role": "system", "status_type": "error", "message": "Call run_task_once error"}
|
|
181
190
|
error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
|
|
182
191
|
yield format_for_yield(error_msg)
|
|
183
192
|
|
|
@@ -198,6 +207,8 @@ class XGATaskEngine:
|
|
|
198
207
|
auto_count = continuous_state.get("auto_continue_count")
|
|
199
208
|
langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(auto_count)
|
|
200
209
|
|
|
210
|
+
self.task_langfuse.root_span.event(name="engine_start_create_completion", level="DEFAULT",
|
|
211
|
+
status_message=(f"Task Engine start create_completion llm_messages len={len(llm_messages)}"))
|
|
201
212
|
llm_response = await self.llm_client.create_completion(llm_messages, langfuse_metadata)
|
|
202
213
|
response_processor = self._create_response_processer()
|
|
203
214
|
|
|
@@ -205,11 +216,13 @@ class XGATaskEngine:
|
|
|
205
216
|
self._logging_reponse_chunk(chunk, auto_count)
|
|
206
217
|
yield chunk
|
|
207
218
|
|
|
208
|
-
def
|
|
219
|
+
def parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
|
|
209
220
|
final_result: XGATaskResult = None
|
|
221
|
+
reverse_chunks = reversed(chunks)
|
|
222
|
+
chunk = None
|
|
210
223
|
try:
|
|
211
224
|
finish_reason = ''
|
|
212
|
-
for chunk in
|
|
225
|
+
for chunk in reverse_chunks:
|
|
213
226
|
chunk_type = chunk.get("type")
|
|
214
227
|
if chunk_type == "status":
|
|
215
228
|
status_content = json.loads(chunk.get('content', '{}'))
|
|
@@ -219,10 +232,7 @@ class XGATaskEngine:
|
|
|
219
232
|
final_result = XGATaskResult(type="error", content=error)
|
|
220
233
|
elif status_type == "finish":
|
|
221
234
|
finish_reason = status_content.get('finish_reason', None)
|
|
222
|
-
|
|
223
|
-
error = "Completed due to over task max_auto_run limit !"
|
|
224
|
-
final_result = XGATaskResult(type="error", content=error)
|
|
225
|
-
elif chunk_type == "tool" and finish_reason in ['completed', 'stop']:
|
|
235
|
+
elif chunk_type == "tool" and finish_reason in ['completed', 'stop', 'xml_tool_limit_reached']:
|
|
226
236
|
tool_content = json.loads(chunk.get('content', '{}'))
|
|
227
237
|
tool_execution = tool_content.get('tool_execution')
|
|
228
238
|
tool_name = tool_execution.get('function_name')
|
|
@@ -242,12 +252,12 @@ class XGATaskEngine:
|
|
|
242
252
|
result_type = "answer" if success else "error"
|
|
243
253
|
result_content = f"Task execute '{tool_name}' {result_type}: {output}"
|
|
244
254
|
final_result = XGATaskResult(type=result_type, content=result_content)
|
|
245
|
-
elif chunk_type == "
|
|
255
|
+
elif chunk_type == "assistant" and finish_reason == 'non_tool_call':
|
|
246
256
|
assis_content = chunk.get('content', {})
|
|
247
257
|
result_content = assis_content.get("content", "LLM output is empty")
|
|
248
258
|
final_result = XGATaskResult(type="answer", content=result_content)
|
|
249
259
|
|
|
250
|
-
if final_result
|
|
260
|
+
if final_result:
|
|
251
261
|
break
|
|
252
262
|
|
|
253
263
|
if final_result and finish_reason == "completed":
|
|
@@ -258,14 +268,17 @@ class XGATaskEngine:
|
|
|
258
268
|
logging.warning(f"❌ FINAL_RESULT: LLM Result is EMPTY, finish_reason={finish_reason}")
|
|
259
269
|
final_result = XGATaskResult(type="error", content="LLM has no answer")
|
|
260
270
|
except Exception as e:
|
|
261
|
-
logging.error(f"parse_final_result:
|
|
262
|
-
final_result = XGATaskResult(type="error", content="Parse final result failed!")
|
|
271
|
+
logging.error(f"TaskEngine parse_final_result: Parse message chunk error, chunk: {chunk}")
|
|
263
272
|
handle_error(e)
|
|
273
|
+
self.task_langfuse.root_span.event(name="engine_parse_final_result_error", level="ERROR",
|
|
274
|
+
status_message=(f"Task Engine parse final result error: {e}"),
|
|
275
|
+
metadata={"content": chunk})
|
|
264
276
|
|
|
265
|
-
|
|
277
|
+
final_result = XGATaskResult(type="error", content="Parse final result failed!")
|
|
266
278
|
|
|
279
|
+
return final_result
|
|
267
280
|
|
|
268
|
-
def
|
|
281
|
+
def create_response_message(self, type: XGAResponseMsgType,
|
|
269
282
|
content: Union[Dict[str, Any], List[Any], str],
|
|
270
283
|
is_llm_message: bool,
|
|
271
284
|
metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
|
|
@@ -283,10 +296,17 @@ class XGATaskEngine:
|
|
|
283
296
|
content = content,
|
|
284
297
|
metadata = metadata
|
|
285
298
|
)
|
|
286
|
-
self.task_response_msgs.append(message)
|
|
287
299
|
|
|
288
300
|
return message
|
|
289
301
|
|
|
302
|
+
def add_response_message(self, type: XGAResponseMsgType,
|
|
303
|
+
content: Union[Dict[str, Any], List[Any], str],
|
|
304
|
+
is_llm_message: bool,
|
|
305
|
+
metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
|
|
306
|
+
message = self.create_response_message(type, content, is_llm_message, metadata)
|
|
307
|
+
self.task_response_msgs.append(message)
|
|
308
|
+
return message
|
|
309
|
+
|
|
290
310
|
def get_history_llm_messages (self) -> List[Dict[str, Any]]:
|
|
291
311
|
llm_messages = []
|
|
292
312
|
for message in self.task_response_msgs:
|
|
@@ -296,13 +316,12 @@ class XGATaskEngine:
|
|
|
296
316
|
response_llm_contents = []
|
|
297
317
|
for llm_message in llm_messages:
|
|
298
318
|
content = llm_message["content"]
|
|
299
|
-
# @todo content List type
|
|
300
319
|
if isinstance(content, str):
|
|
301
320
|
try:
|
|
302
321
|
_content = json.loads(content)
|
|
303
322
|
response_llm_contents.append(_content)
|
|
304
323
|
except json.JSONDecodeError as e:
|
|
305
|
-
logging.error(f"
|
|
324
|
+
logging.error(f"TaskEngine get_history_llm_messages: Failed to decode json, content: {content}")
|
|
306
325
|
handle_error(e)
|
|
307
326
|
else:
|
|
308
327
|
response_llm_contents.append(content)
|
|
@@ -327,9 +346,11 @@ class XGATaskEngine:
|
|
|
327
346
|
"task_no": self.task_no,
|
|
328
347
|
"model_name": self.model_name,
|
|
329
348
|
"max_xml_tool_calls": 0,
|
|
349
|
+
"use_assistant_chunk_msg": self.use_assistant_chunk_msg,
|
|
330
350
|
"tool_execution_strategy": "parallel" if self.tool_exec_parallel else "sequential", # ,
|
|
331
351
|
"xml_adding_strategy": "user_message",
|
|
332
352
|
"add_response_msg_func": self.add_response_message,
|
|
353
|
+
"create_response_msg_func": self.create_response_message,
|
|
333
354
|
"tool_box": self.tool_box,
|
|
334
355
|
"task_langfuse": self.task_langfuse,
|
|
335
356
|
}
|
|
@@ -340,21 +361,35 @@ class XGATaskEngine:
|
|
|
340
361
|
return XGATaskLangFuse(self.session_id, self.task_id, self.task_run_id, self.task_no, self.agent_id)
|
|
341
362
|
|
|
342
363
|
|
|
343
|
-
def _logging_reponse_chunk(self, chunk, auto_count: int)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
364
|
+
def _logging_reponse_chunk(self, chunk, auto_count: int)-> None:
|
|
365
|
+
try:
|
|
366
|
+
chunk_type = chunk.get('type', 'unknown')
|
|
367
|
+
prefix = ""
|
|
368
|
+
if chunk_type == 'status':
|
|
369
|
+
content = json.loads(chunk.get('content', '{}'))
|
|
370
|
+
status_type = content.get('status_type', "empty")
|
|
371
|
+
if status_type in ["tool_started", "tool_completed"]:
|
|
372
|
+
return
|
|
373
|
+
prefix = "-" + status_type
|
|
374
|
+
elif chunk_type == 'tool':
|
|
375
|
+
tool_content = json.loads(chunk.get('content', '{}'))
|
|
376
|
+
tool_execution = tool_content.get('tool_execution')
|
|
377
|
+
tool_name = tool_execution.get('function_name')
|
|
378
|
+
prefix = "-" + tool_name
|
|
379
|
+
|
|
380
|
+
content = chunk.get('content', '')
|
|
381
|
+
pretty_content = content
|
|
382
|
+
if isinstance(content, dict):
|
|
383
|
+
pretty_content = json.dumps(content, ensure_ascii=False, indent=2)
|
|
384
|
+
|
|
385
|
+
if chunk_type == "assistant_chunk":
|
|
386
|
+
logging.debug(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
|
|
387
|
+
else:
|
|
388
|
+
logging.info(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
|
|
356
389
|
|
|
357
|
-
|
|
390
|
+
except Exception as e:
|
|
391
|
+
logging.error(f"TaskEngine logging_reponse_chunk: Decorate chunk log error, chunk: {chunk}")
|
|
392
|
+
handle_error(e)
|
|
358
393
|
|
|
359
394
|
|
|
360
395
|
if __name__ == "__main__":
|
|
@@ -371,9 +406,7 @@ if __name__ == "__main__":
|
|
|
371
406
|
engine = XGATaskEngine(tool_box=tool_box,
|
|
372
407
|
general_tools=[],
|
|
373
408
|
custom_tools=["*"],
|
|
374
|
-
llm_config=LLMConfig(stream=False),
|
|
375
409
|
system_prompt=system_prompt,
|
|
376
|
-
max_auto_run=8,
|
|
377
410
|
session_id="session_1",
|
|
378
411
|
agent_id="agent_1",)
|
|
379
412
|
|
|
@@ -3,7 +3,6 @@ from pydantic import Field
|
|
|
3
3
|
|
|
4
4
|
from mcp.server.fastmcp import FastMCP
|
|
5
5
|
|
|
6
|
-
from xgae.engine.engine_base import XGAToolResult
|
|
7
6
|
|
|
8
7
|
mcp = FastMCP(name="XGAE Message Tools")
|
|
9
8
|
|
|
@@ -17,7 +16,7 @@ async def complete(task_id: str,
|
|
|
17
16
|
description="Comma-separated list of final outputs. Use when: 1) Completion relates to files 2) User needs to review outputs 3) Deliverables in files")]
|
|
18
17
|
):
|
|
19
18
|
print(f"<XGAETools-complete>: task_id={task_id}, text={text}, attachments={attachments}")
|
|
20
|
-
return
|
|
19
|
+
return {"status": "complete"}
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
@mcp.tool(
|
|
@@ -30,7 +29,7 @@ async def ask(task_id: str,
|
|
|
30
29
|
description="Comma-separated list of files/URLs to attach. Use when: 1) Question relates to files/configs 2) User needs to review content 3) Options documented in files 4) Supporting evidence needed")]
|
|
31
30
|
):
|
|
32
31
|
print(f"<XGAETools-ask>: task_id={task_id}, text={text}, attachments={attachments}")
|
|
33
|
-
return
|
|
32
|
+
return {"status": "Awaiting user response..."}
|
|
34
33
|
|
|
35
34
|
@mcp.tool(
|
|
36
35
|
description="end task, destroy sandbox"
|
xgae/utils/__init__.py
CHANGED
xgae/utils/json_helpers.py
CHANGED
|
@@ -159,10 +159,10 @@ def format_for_yield(message_object: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
159
159
|
|
|
160
160
|
# Ensure content is a JSON string
|
|
161
161
|
if 'content' in formatted and not isinstance(formatted['content'], str):
|
|
162
|
-
formatted['content'] = json.dumps(formatted['content'])
|
|
162
|
+
formatted['content'] = json.dumps(formatted['content'], ensure_ascii=False, indent=2)
|
|
163
163
|
|
|
164
164
|
# Ensure metadata is a JSON string
|
|
165
165
|
if 'metadata' in formatted and not isinstance(formatted['metadata'], str):
|
|
166
|
-
formatted['metadata'] = json.dumps(formatted['metadata'])
|
|
166
|
+
formatted['metadata'] = json.dumps(formatted['metadata'], ensure_ascii=False, indent=2)
|
|
167
167
|
|
|
168
168
|
return formatted
|
xgae/utils/llm_client.py
CHANGED
|
@@ -19,8 +19,8 @@ class LLMConfig(TypedDict, total=False):
|
|
|
19
19
|
api_base: str # Optional API base URL, Override .env LLM_API_BASE
|
|
20
20
|
temperature: float # temperature: Optional Sampling temperature (0-1), Override .env LLM_TEMPERATURE
|
|
21
21
|
max_tokens: int # max_tokens: Optional Maximum tokens in the response, Override .env LLM_MAX_TOKENS
|
|
22
|
-
stream: bool # stream: Optional whether to stream the response,
|
|
23
|
-
enable_thinking: bool # Optional whether to enable thinking,
|
|
22
|
+
stream: bool # stream: Optional whether to stream the response, Override .env LLM_STREAM
|
|
23
|
+
enable_thinking: bool # Optional whether to enable thinking, Override .env LLM_ENABLE_THINKING
|
|
24
24
|
reasoning_effort: str # Optional level of reasoning effort, default is ‘low’
|
|
25
25
|
response_format: str # response_format: Optional desired format for the response, default is None
|
|
26
26
|
top_p: int # Optional Top-p sampling parameter, default is None
|
|
@@ -58,6 +58,8 @@ class LLMClient:
|
|
|
58
58
|
env_llm_api_base = os.getenv("LLM_API_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
|
59
59
|
env_llm_max_tokens = int(os.getenv("LLM_MAX_TOKENS", 16384))
|
|
60
60
|
env_llm_temperature = float(os.getenv("LLM_TEMPERATURE", 0.7))
|
|
61
|
+
env_llm_stream = to_bool(os.getenv("LLM_STREAM", False))
|
|
62
|
+
env_llm_enable_thinking = to_bool(os.getenv("LLM_ENABLE_THINKING", False))
|
|
61
63
|
|
|
62
64
|
llm_config_params = {
|
|
63
65
|
"model": llm_config.get("model", env_llm_model),
|
|
@@ -67,8 +69,8 @@ class LLMClient:
|
|
|
67
69
|
"api_base": llm_config.get("api_base", env_llm_api_base),
|
|
68
70
|
"temperature": llm_config.get("temperature", env_llm_temperature),
|
|
69
71
|
"max_tokens": llm_config.get("max_tokens", env_llm_max_tokens),
|
|
70
|
-
"stream": llm_config.get("stream",
|
|
71
|
-
"enable_thinking": llm_config.get("enable_thinking",
|
|
72
|
+
"stream": llm_config.get("stream", env_llm_stream),
|
|
73
|
+
"enable_thinking": llm_config.get("enable_thinking", env_llm_enable_thinking),
|
|
72
74
|
"reasoning_effort": llm_config.get("reasoning_effort", 'low'),
|
|
73
75
|
"response_format": llm_config.get("response_format", None),
|
|
74
76
|
"top_p": llm_config.get("top_p", None),
|
|
@@ -115,27 +117,27 @@ class LLMClient:
|
|
|
115
117
|
# as it causes errors with inference profiles
|
|
116
118
|
if model_name.startswith("bedrock/") and "claude-3-7" in model_name:
|
|
117
119
|
prepared_llm_params.pop("max_tokens")
|
|
118
|
-
logging.debug(f"prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
|
|
120
|
+
logging.debug(f"LLMClient prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
|
|
119
121
|
else:
|
|
120
122
|
is_openai_o_series = 'o1' in model_name
|
|
121
123
|
is_openai_gpt5 = 'gpt-5' in model_name
|
|
122
124
|
param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens"
|
|
123
125
|
if param_name == "max_completion_tokens":
|
|
124
126
|
prepared_llm_params[param_name] = max_tokens
|
|
125
|
-
logging.debug(f"prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
|
|
127
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
|
|
126
128
|
|
|
127
129
|
# # Add Claude-specific headers
|
|
128
130
|
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
|
129
131
|
prepared_llm_params["extra_headers"] = {
|
|
130
132
|
"anthropic-beta": "output-128k-2025-02-19"
|
|
131
133
|
}
|
|
132
|
-
logging.debug(f"prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
|
|
134
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
|
|
133
135
|
|
|
134
136
|
# Add Bedrock-specific parameters
|
|
135
137
|
if model_name.startswith("bedrock/"):
|
|
136
138
|
if not model_id and "anthropic.claude-3-7-sonnet" in model_name:
|
|
137
139
|
prepared_llm_params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
|
|
138
|
-
logging.debug(f"prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
|
|
140
|
+
logging.debug(f"LLMClient prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
|
|
139
141
|
|
|
140
142
|
# Apply Anthropic prompt caching (minimal implementation)
|
|
141
143
|
effective_model_name = llm_config_params.get("model", model_name)
|
|
@@ -143,14 +145,14 @@ class LLMClient:
|
|
|
143
145
|
# OpenAI GPT-5: drop unsupported temperature param (only default 1 allowed)
|
|
144
146
|
if "gpt-5" in effective_model_name and "temperature" in llm_config_params and llm_config_params["temperature"] != 1:
|
|
145
147
|
prepared_llm_params.pop("temperature", None)
|
|
146
|
-
logging.debug(f"prepare_llm_params: Remove 'temperature' param for model: {model_name}")
|
|
148
|
+
logging.debug(f"LLMClient prepare_llm_params: Remove 'temperature' param for model: {model_name}")
|
|
147
149
|
|
|
148
150
|
# OpenAI GPT-5: request priority service tier when calling OpenAI directly
|
|
149
151
|
# Pass via both top-level and extra_body for LiteLLM compatibility
|
|
150
152
|
if "gpt-5" in effective_model_name and not effective_model_name.startswith("openrouter/"):
|
|
151
153
|
prepared_llm_params["service_tier"] = "priority"
|
|
152
154
|
prepared_llm_params["extra_body"] = {"service_tier": "priority"}
|
|
153
|
-
logging.debug(f"prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
|
|
155
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
|
|
154
156
|
|
|
155
157
|
# Add reasoning_effort for Anthropic models if enabled
|
|
156
158
|
enable_thinking = llm_config_params.get("enable_thinking")
|
|
@@ -163,14 +165,14 @@ class LLMClient:
|
|
|
163
165
|
prepared_llm_params["provider"] = {
|
|
164
166
|
"order": ["together/fp8", "novita/fp8", "baseten/fp8", "moonshotai", "groq"]
|
|
165
167
|
}
|
|
166
|
-
logging.debug(f"prepare_llm_params: Add 'provider' param for model: {model_name}")
|
|
168
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'provider' param for model: {model_name}")
|
|
167
169
|
|
|
168
170
|
reasoning_effort = llm_config_params.get("reasoning_effort")
|
|
169
171
|
if is_anthropic and use_thinking:
|
|
170
172
|
effort_level = reasoning_effort if reasoning_effort else 'low'
|
|
171
173
|
prepared_llm_params["reasoning_effort"] = effort_level
|
|
172
174
|
prepared_llm_params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used
|
|
173
|
-
logging.debug(f"prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
|
|
175
|
+
logging.debug(f"LLMClient prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
|
|
174
176
|
|
|
175
177
|
return prepared_llm_params
|
|
176
178
|
|
|
@@ -204,7 +206,7 @@ class LLMClient:
|
|
|
204
206
|
{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
|
|
205
207
|
]
|
|
206
208
|
cache_control_count += 1
|
|
207
|
-
logging.debug(f"prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
|
|
209
|
+
logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
|
|
208
210
|
elif isinstance(content, list):
|
|
209
211
|
for item in content:
|
|
210
212
|
if cache_control_count >= max_cache_control_blocks:
|
|
@@ -212,7 +214,7 @@ class LLMClient:
|
|
|
212
214
|
if isinstance(item, dict) and item.get("type") == "text" and "cache_control" not in item:
|
|
213
215
|
item["cache_control"] = {"type": "ephemeral"}
|
|
214
216
|
cache_control_count += 1
|
|
215
|
-
logging.debug(f"prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
|
|
217
|
+
logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
|
|
216
218
|
|
|
217
219
|
return complete_params
|
|
218
220
|
|
|
@@ -232,18 +234,18 @@ class LLMClient:
|
|
|
232
234
|
last_error = None
|
|
233
235
|
for attempt in range(self.max_retries):
|
|
234
236
|
try:
|
|
235
|
-
logging.info(f"*** create_completion
|
|
237
|
+
logging.info(f"*** LLMClient create_completion: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
|
|
236
238
|
response = await litellm.acompletion(**complete_params)
|
|
237
239
|
return response
|
|
238
240
|
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
|
|
239
241
|
last_error = e
|
|
240
242
|
await self._handle_llm_error(e, attempt)
|
|
241
243
|
except Exception as e:
|
|
242
|
-
logging.error(f"create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
|
|
243
|
-
raise LLMError(f"
|
|
244
|
+
logging.error(f"LLMClient create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
|
|
245
|
+
raise LLMError(f"LLMClient create completion failed: {e}")
|
|
244
246
|
|
|
245
|
-
logging.error(f"create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
|
|
246
|
-
raise LLMError(f"
|
|
247
|
+
logging.error(f"LLMClient create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
|
|
248
|
+
raise LLMError(f"LLMClient create completion failed after {self.max_retries} attempts !")
|
|
247
249
|
|
|
248
250
|
if __name__ == "__main__":
|
|
249
251
|
from xgae.utils.setup_env import setup_logging
|
xgae/utils/setup_env.py
CHANGED
|
@@ -60,6 +60,7 @@ def setup_env_logging():
|
|
|
60
60
|
log_file = os.getenv("LOG_FILE", "log/xga.log")
|
|
61
61
|
if log_enable :
|
|
62
62
|
setup_logging(log_file, log_level)
|
|
63
|
+
setup_logging(log_file, log_level)
|
|
63
64
|
|
|
64
65
|
def setup_langfuse() -> Langfuse:
|
|
65
66
|
env_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
xgae/__init__.py,sha256=OEUd9y9AoGBd3xYerdTTpz9xl4NWkmXeq1a2eil7Qro,72
|
|
2
|
+
xgae/cli_app.py,sha256=vKuCIJw0gwXdtkT-QNCZKt2dE53thvTFwQr7nTgvaPY,3000
|
|
3
|
+
xgae/engine/engine_base.py,sha256=-QZqLRbQdwRUfbY4l3i7dFfMB-BL267a-wGZR9bMPLc,1662
|
|
4
|
+
xgae/engine/mcp_tool_box.py,sha256=Vyku8uOsTZ4ElnwEEgaw0hiltTOlC-FvtC9Ox-iJYck,10089
|
|
5
|
+
xgae/engine/prompt_builder.py,sha256=X9bS7YIms6LYplCpNHeUmi74xFP5MwFXmXNqOt1Xz-Q,4356
|
|
6
|
+
xgae/engine/task_engine.py,sha256=J4hVAkRgmd24larSFjxoCaVh9r3fDZJ95fUGB-FICZ8,21174
|
|
7
|
+
xgae/engine/task_langfuse.py,sha256=b0aJ_Di-WDcYzi0TFCvcKWxkBz7PYP2jx3N52OptQMs,2349
|
|
8
|
+
xgae/engine/responser/non_stream_responser.py,sha256=za1-7b37jVcg1cgTadNNXljqUTbzA92168i80xV7zdw,5589
|
|
9
|
+
xgae/engine/responser/responser_base.py,sha256=WA2oKqP-UhQZj2es2nIFKf6_XkOhIfqZMUcQzDhtc6Q,30424
|
|
10
|
+
xgae/engine/responser/stream_responser.py,sha256=dXcj-l3jb8J0orZ7THdf0sOjw9M7aZbfjHQC0NwQizo,7868
|
|
11
|
+
xgae/tools/without_general_tools_app.py,sha256=cza3aLVh-090QABYA_DakoXmlFmc9rxwrXQsQwveT9A,3655
|
|
12
|
+
xgae/utils/__init__.py,sha256=6lZCuEzMj66SW5sXyWrIuLH2W-1mHpbLpIBq_qbnsiw,337
|
|
13
|
+
xgae/utils/json_helpers.py,sha256=ubp-dOCeROnZv7JHARRdmDIO5Npdwzrt8AWo3SMv0kI,4705
|
|
14
|
+
xgae/utils/llm_client.py,sha256=6e3kzx73QN6z2SYMQQFmrmODj2Rk-GPJYIxBcFZhMQE,14361
|
|
15
|
+
xgae/utils/misc.py,sha256=M8lMXYp1pHiY6Ee8ZTUG88GpOAsE5fbYoRO_hcBFUCE,953
|
|
16
|
+
xgae/utils/setup_env.py,sha256=MqNG0c2QQBDFU1kI8frxr9kB5d08Mmi3QZ1OoorgIa0,2662
|
|
17
|
+
xgae/utils/xml_tool_parser.py,sha256=I9xAZC_ElwBY19PNUq-WLXe9FSIJMeAv2Xs-VlajI7Y,4782
|
|
18
|
+
xgae-0.1.12.dist-info/METADATA,sha256=7bOOMvn3Ntj_v_tZy2hkfiNKyTABLhoZRKDb0CJkwpM,310
|
|
19
|
+
xgae-0.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
20
|
+
xgae-0.1.12.dist-info/entry_points.txt,sha256=vClvL_WBJyF2x3wJCz5CNJ_BJG-dWUh7h2YbAoskHsc,162
|
|
21
|
+
xgae-0.1.12.dist-info/RECORD,,
|
xgae-0.1.10.dist-info/RECORD
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
xgae/__init__.py,sha256=OEUd9y9AoGBd3xYerdTTpz9xl4NWkmXeq1a2eil7Qro,72
|
|
2
|
-
xgae/cli_app.py,sha256=ePis7gYYZrevEArnCQOhaN7z4C8Y5yJSOIov8z-lGBs,3157
|
|
3
|
-
xgae/engine/engine_base.py,sha256=ioywuTpDMHEmyVcd6BInoU-vR70PhQStE2MVRWoEiJg,1768
|
|
4
|
-
xgae/engine/mcp_tool_box.py,sha256=ZSCBSXRWhISwyZ1uEIbt3esjesM46g-ktv6CxvyPVDU,10030
|
|
5
|
-
xgae/engine/prompt_builder.py,sha256=X9bS7YIms6LYplCpNHeUmi74xFP5MwFXmXNqOt1Xz-Q,4356
|
|
6
|
-
xgae/engine/task_engine.py,sha256=LAo55FKmmO8Jbo5geEUYr8kFCaVigTb-Jm06XuYKYyY,19010
|
|
7
|
-
xgae/engine/task_langfuse.py,sha256=b0aJ_Di-WDcYzi0TFCvcKWxkBz7PYP2jx3N52OptQMs,2349
|
|
8
|
-
xgae/engine/responser/non_stream_responser.py,sha256=9YCCUedbotH-TPPbTh2Mv1qNVYvznHYFPgAnQB7NJSE,6510
|
|
9
|
-
xgae/engine/responser/responser_base.py,sha256=8PcsvQHP68FEhu6v3dT9hDCc_rLKs38i4txWLcJD4ck,29851
|
|
10
|
-
xgae/engine/responser/stream_responser.py,sha256=oPGtrT1nedGMjiBAwPzUlu6Z_rPWeVSODC1xQ6D8cTY,52055
|
|
11
|
-
xgae/tools/without_general_tools_app.py,sha256=FGMV6njcOKwwfitc0j_nUov0RC-eWlhO1IP8_KHz1tQ,3788
|
|
12
|
-
xgae/utils/__init__.py,sha256=_-TTNq5FanrA-jl_w3-4xp-BnRM7SLwfYQcFyvepcW0,332
|
|
13
|
-
xgae/utils/json_helpers.py,sha256=6BkqiyEF3jV3Irb4Z6-wGY2_FNaLlxE1WKlMJHHT6E0,4645
|
|
14
|
-
xgae/utils/llm_client.py,sha256=hvEDb4DBaWVQTXMjXOd6KrFwJFBcI-YXEQD4f_AhG7Q,14008
|
|
15
|
-
xgae/utils/misc.py,sha256=M8lMXYp1pHiY6Ee8ZTUG88GpOAsE5fbYoRO_hcBFUCE,953
|
|
16
|
-
xgae/utils/setup_env.py,sha256=HweQ-WAyxfV3KYjGYi-rRQAbI_SXoimduOLpQPbHfl8,2619
|
|
17
|
-
xgae/utils/xml_tool_parser.py,sha256=I9xAZC_ElwBY19PNUq-WLXe9FSIJMeAv2Xs-VlajI7Y,4782
|
|
18
|
-
xgae-0.1.10.dist-info/METADATA,sha256=SGnhZrr3DDZ600FDMep9ihERmhwFspEtUKRv1THqQsk,310
|
|
19
|
-
xgae-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
20
|
-
xgae-0.1.10.dist-info/entry_points.txt,sha256=vClvL_WBJyF2x3wJCz5CNJ_BJG-dWUh7h2YbAoskHsc,162
|
|
21
|
-
xgae-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|