xgae 0.1.10__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xgae might be problematic. Click here for more details.
- xgae/cli_app.py +2 -4
- xgae/engine/engine_base.py +3 -3
- xgae/engine/mcp_tool_box.py +11 -6
- xgae/engine/responser/non_stream_responser.py +30 -39
- xgae/engine/responser/responser_base.py +52 -49
- xgae/engine/responser/stream_responser.py +93 -782
- xgae/engine/task_engine.py +77 -48
- xgae/utils/__init__.py +18 -6
- xgae/utils/json_helpers.py +2 -2
- xgae/utils/llm_client.py +21 -19
- xgae/utils/misc.py +1 -2
- xgae/utils/setup_env.py +1 -0
- {xgae-0.1.10.dist-info → xgae-0.1.13.dist-info}/METADATA +1 -1
- xgae-0.1.13.dist-info/RECORD +21 -0
- xgae-0.1.10.dist-info/RECORD +0 -21
- {xgae-0.1.10.dist-info → xgae-0.1.13.dist-info}/WHEEL +0 -0
- {xgae-0.1.10.dist-info → xgae-0.1.13.dist-info}/entry_points.txt +0 -0
xgae/engine/task_engine.py
CHANGED
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
from typing import List, Any, Dict, Optional, AsyncGenerator, Union, Literal
|
|
6
6
|
from uuid import uuid4
|
|
7
7
|
|
|
8
|
-
from xgae.utils import
|
|
8
|
+
from xgae.utils import log_trace, to_bool
|
|
9
9
|
from xgae.utils.llm_client import LLMClient, LLMConfig
|
|
10
10
|
from xgae.utils.json_helpers import format_for_yield
|
|
11
11
|
|
|
@@ -46,6 +46,8 @@ class XGATaskEngine:
|
|
|
46
46
|
|
|
47
47
|
max_auto_run = max_auto_run if max_auto_run else int(os.getenv("MAX_AUTO_RUN", 15))
|
|
48
48
|
self.max_auto_run: int = 1 if max_auto_run <= 1 else max_auto_run
|
|
49
|
+
|
|
50
|
+
self.use_assistant_chunk_msg = to_bool(os.getenv("USE_ASSISTANT_CHUNK_MSG", False))
|
|
49
51
|
self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
|
|
50
52
|
|
|
51
53
|
self.task_no = -1
|
|
@@ -67,7 +69,7 @@ class XGATaskEngine:
|
|
|
67
69
|
chunks.append(chunk)
|
|
68
70
|
|
|
69
71
|
if len(chunks) > 0:
|
|
70
|
-
final_result = self.
|
|
72
|
+
final_result = self.parse_final_result(chunks)
|
|
71
73
|
else:
|
|
72
74
|
final_result = XGATaskResult(type="error", content="LLM Answer is Empty")
|
|
73
75
|
|
|
@@ -123,16 +125,16 @@ class XGATaskEngine:
|
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
async def _run_task_auto(self) -> AsyncGenerator[Dict[str, Any], None]:
|
|
126
|
-
def update_continuous_state(_auto_continue_count, _auto_continue):
|
|
127
|
-
continuous_state["auto_continue_count"] = _auto_continue_count
|
|
128
|
-
continuous_state["auto_continue"] = _auto_continue
|
|
129
|
-
|
|
130
128
|
continuous_state: TaskRunContinuousState = {
|
|
131
129
|
"accumulated_content": "",
|
|
132
130
|
"auto_continue_count": 0,
|
|
133
131
|
"auto_continue": False if self.max_auto_run <= 1 else True
|
|
134
132
|
}
|
|
135
133
|
|
|
134
|
+
def update_continuous_state(_auto_continue_count, _auto_continue):
|
|
135
|
+
continuous_state["auto_continue_count"] = _auto_continue_count
|
|
136
|
+
continuous_state["auto_continue"] = _auto_continue
|
|
137
|
+
|
|
136
138
|
auto_continue_count = 0
|
|
137
139
|
auto_continue = True
|
|
138
140
|
while auto_continue and auto_continue_count < self.max_auto_run:
|
|
@@ -146,38 +148,44 @@ class XGATaskEngine:
|
|
|
146
148
|
content = json.loads(chunk.get('content', '{}'))
|
|
147
149
|
status_type = content.get('status_type', None)
|
|
148
150
|
if status_type == "error":
|
|
149
|
-
logging.error(f"run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
|
|
151
|
+
logging.error(f"TaskEngine run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
|
|
150
152
|
auto_continue = False
|
|
151
153
|
break
|
|
152
154
|
elif status_type == 'finish':
|
|
153
155
|
finish_reason = content.get('finish_reason', None)
|
|
154
156
|
if finish_reason == 'completed':
|
|
155
|
-
logging.info(f"run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
|
|
157
|
+
logging.info(f"TaskEngine run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
|
|
156
158
|
auto_continue = False
|
|
157
159
|
break
|
|
158
160
|
elif finish_reason == 'xml_tool_limit_reached':
|
|
159
|
-
logging.warning(f"run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
|
|
161
|
+
logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
|
|
160
162
|
auto_continue = False
|
|
161
163
|
break
|
|
162
164
|
elif finish_reason == 'non_tool_call':
|
|
163
|
-
logging.warning(f"run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
|
|
165
|
+
logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
|
|
164
166
|
auto_continue = False
|
|
165
167
|
break
|
|
166
168
|
elif finish_reason == 'stop' or finish_reason == 'length': # 'length' never occur
|
|
167
169
|
auto_continue = True
|
|
168
170
|
auto_continue_count += 1
|
|
169
171
|
update_continuous_state(auto_continue_count, auto_continue)
|
|
170
|
-
logging.info(f"run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
|
|
172
|
+
logging.info(f"TaskEngine run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
|
|
171
173
|
except Exception as parse_error:
|
|
172
|
-
|
|
174
|
+
trace = log_trace(parse_error,f"TaskEngine run_task_auto: Parse chunk error, chunk: {chunk}")
|
|
175
|
+
self.task_langfuse.root_span.event(name="engine_parse_chunk_error", level="ERROR",
|
|
176
|
+
status_message=f"Task Engine parse chunk error: {parse_error}",
|
|
177
|
+
metadata={"content": chunk, "trace": trace})
|
|
178
|
+
|
|
173
179
|
content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
|
|
174
|
-
handle_error(parse_error)
|
|
175
180
|
error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
|
|
176
181
|
yield format_for_yield(error_msg)
|
|
177
182
|
except Exception as run_error:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
183
|
+
trace = log_trace(run_error, "TaskEngine run_task_auto: Call task_run_once")
|
|
184
|
+
self.task_langfuse.root_span.event(name="engine_task_run_once_error", level="ERROR",
|
|
185
|
+
status_message=f"Call task_run_once error: {run_error}",
|
|
186
|
+
metadata={"trace": trace})
|
|
187
|
+
|
|
188
|
+
content = {"role": "system", "status_type": "error", "message": "Call run_task_once error"}
|
|
181
189
|
error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
|
|
182
190
|
yield format_for_yield(error_msg)
|
|
183
191
|
|
|
@@ -198,6 +206,8 @@ class XGATaskEngine:
|
|
|
198
206
|
auto_count = continuous_state.get("auto_continue_count")
|
|
199
207
|
langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(auto_count)
|
|
200
208
|
|
|
209
|
+
self.task_langfuse.root_span.event(name="engine_start_create_completion", level="DEFAULT",
|
|
210
|
+
status_message=(f"Task Engine start create_completion llm_messages len={len(llm_messages)}"))
|
|
201
211
|
llm_response = await self.llm_client.create_completion(llm_messages, langfuse_metadata)
|
|
202
212
|
response_processor = self._create_response_processer()
|
|
203
213
|
|
|
@@ -205,11 +215,13 @@ class XGATaskEngine:
|
|
|
205
215
|
self._logging_reponse_chunk(chunk, auto_count)
|
|
206
216
|
yield chunk
|
|
207
217
|
|
|
208
|
-
def
|
|
218
|
+
def parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
|
|
209
219
|
final_result: XGATaskResult = None
|
|
220
|
+
reverse_chunks = reversed(chunks)
|
|
221
|
+
chunk = None
|
|
210
222
|
try:
|
|
211
223
|
finish_reason = ''
|
|
212
|
-
for chunk in
|
|
224
|
+
for chunk in reverse_chunks:
|
|
213
225
|
chunk_type = chunk.get("type")
|
|
214
226
|
if chunk_type == "status":
|
|
215
227
|
status_content = json.loads(chunk.get('content', '{}'))
|
|
@@ -219,10 +231,7 @@ class XGATaskEngine:
|
|
|
219
231
|
final_result = XGATaskResult(type="error", content=error)
|
|
220
232
|
elif status_type == "finish":
|
|
221
233
|
finish_reason = status_content.get('finish_reason', None)
|
|
222
|
-
|
|
223
|
-
error = "Completed due to over task max_auto_run limit !"
|
|
224
|
-
final_result = XGATaskResult(type="error", content=error)
|
|
225
|
-
elif chunk_type == "tool" and finish_reason in ['completed', 'stop']:
|
|
234
|
+
elif chunk_type == "tool" and finish_reason in ['completed', 'stop', 'xml_tool_limit_reached']:
|
|
226
235
|
tool_content = json.loads(chunk.get('content', '{}'))
|
|
227
236
|
tool_execution = tool_content.get('tool_execution')
|
|
228
237
|
tool_name = tool_execution.get('function_name')
|
|
@@ -242,12 +251,12 @@ class XGATaskEngine:
|
|
|
242
251
|
result_type = "answer" if success else "error"
|
|
243
252
|
result_content = f"Task execute '{tool_name}' {result_type}: {output}"
|
|
244
253
|
final_result = XGATaskResult(type=result_type, content=result_content)
|
|
245
|
-
elif chunk_type == "
|
|
254
|
+
elif chunk_type == "assistant" and finish_reason == 'non_tool_call':
|
|
246
255
|
assis_content = chunk.get('content', {})
|
|
247
256
|
result_content = assis_content.get("content", "LLM output is empty")
|
|
248
257
|
final_result = XGATaskResult(type="answer", content=result_content)
|
|
249
258
|
|
|
250
|
-
if final_result
|
|
259
|
+
if final_result:
|
|
251
260
|
break
|
|
252
261
|
|
|
253
262
|
if final_result and finish_reason == "completed":
|
|
@@ -258,14 +267,16 @@ class XGATaskEngine:
|
|
|
258
267
|
logging.warning(f"❌ FINAL_RESULT: LLM Result is EMPTY, finish_reason={finish_reason}")
|
|
259
268
|
final_result = XGATaskResult(type="error", content="LLM has no answer")
|
|
260
269
|
except Exception as e:
|
|
261
|
-
|
|
270
|
+
trace = log_trace(e, f"TaskEngine parse_final_result: Parse message chunk error, chunk: {chunk}")
|
|
271
|
+
self.task_langfuse.root_span.event(name="engine_parse_final_result_error", level="ERROR",
|
|
272
|
+
status_message=f"Task Engine parse final result error: {e}",
|
|
273
|
+
metadata={"content": chunk, "trace": trace})
|
|
274
|
+
|
|
262
275
|
final_result = XGATaskResult(type="error", content="Parse final result failed!")
|
|
263
|
-
handle_error(e)
|
|
264
276
|
|
|
265
277
|
return final_result
|
|
266
278
|
|
|
267
|
-
|
|
268
|
-
def add_response_message(self, type: XGAResponseMsgType,
|
|
279
|
+
def create_response_message(self, type: XGAResponseMsgType,
|
|
269
280
|
content: Union[Dict[str, Any], List[Any], str],
|
|
270
281
|
is_llm_message: bool,
|
|
271
282
|
metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
|
|
@@ -283,10 +294,17 @@ class XGATaskEngine:
|
|
|
283
294
|
content = content,
|
|
284
295
|
metadata = metadata
|
|
285
296
|
)
|
|
286
|
-
self.task_response_msgs.append(message)
|
|
287
297
|
|
|
288
298
|
return message
|
|
289
299
|
|
|
300
|
+
def add_response_message(self, type: XGAResponseMsgType,
|
|
301
|
+
content: Union[Dict[str, Any], List[Any], str],
|
|
302
|
+
is_llm_message: bool,
|
|
303
|
+
metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
|
|
304
|
+
message = self.create_response_message(type, content, is_llm_message, metadata)
|
|
305
|
+
self.task_response_msgs.append(message)
|
|
306
|
+
return message
|
|
307
|
+
|
|
290
308
|
def get_history_llm_messages (self) -> List[Dict[str, Any]]:
|
|
291
309
|
llm_messages = []
|
|
292
310
|
for message in self.task_response_msgs:
|
|
@@ -296,14 +314,12 @@ class XGATaskEngine:
|
|
|
296
314
|
response_llm_contents = []
|
|
297
315
|
for llm_message in llm_messages:
|
|
298
316
|
content = llm_message["content"]
|
|
299
|
-
# @todo content List type
|
|
300
317
|
if isinstance(content, str):
|
|
301
318
|
try:
|
|
302
319
|
_content = json.loads(content)
|
|
303
320
|
response_llm_contents.append(_content)
|
|
304
321
|
except json.JSONDecodeError as e:
|
|
305
|
-
|
|
306
|
-
handle_error(e)
|
|
322
|
+
pass
|
|
307
323
|
else:
|
|
308
324
|
response_llm_contents.append(content)
|
|
309
325
|
|
|
@@ -327,9 +343,11 @@ class XGATaskEngine:
|
|
|
327
343
|
"task_no": self.task_no,
|
|
328
344
|
"model_name": self.model_name,
|
|
329
345
|
"max_xml_tool_calls": 0,
|
|
346
|
+
"use_assistant_chunk_msg": self.use_assistant_chunk_msg,
|
|
330
347
|
"tool_execution_strategy": "parallel" if self.tool_exec_parallel else "sequential", # ,
|
|
331
348
|
"xml_adding_strategy": "user_message",
|
|
332
349
|
"add_response_msg_func": self.add_response_message,
|
|
350
|
+
"create_response_msg_func": self.create_response_message,
|
|
333
351
|
"tool_box": self.tool_box,
|
|
334
352
|
"task_langfuse": self.task_langfuse,
|
|
335
353
|
}
|
|
@@ -340,21 +358,34 @@ class XGATaskEngine:
|
|
|
340
358
|
return XGATaskLangFuse(self.session_id, self.task_id, self.task_run_id, self.task_no, self.agent_id)
|
|
341
359
|
|
|
342
360
|
|
|
343
|
-
def _logging_reponse_chunk(self, chunk, auto_count: int)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
361
|
+
def _logging_reponse_chunk(self, chunk, auto_count: int)-> None:
|
|
362
|
+
try:
|
|
363
|
+
chunk_type = chunk.get('type', 'unknown')
|
|
364
|
+
prefix = ""
|
|
365
|
+
if chunk_type == 'status':
|
|
366
|
+
content = json.loads(chunk.get('content', '{}'))
|
|
367
|
+
status_type = content.get('status_type', "empty")
|
|
368
|
+
if status_type in ["tool_started", "tool_completed"]:
|
|
369
|
+
return
|
|
370
|
+
prefix = "-" + status_type
|
|
371
|
+
elif chunk_type == 'tool':
|
|
372
|
+
tool_content = json.loads(chunk.get('content', '{}'))
|
|
373
|
+
tool_execution = tool_content.get('tool_execution')
|
|
374
|
+
tool_name = tool_execution.get('function_name')
|
|
375
|
+
prefix = "-" + tool_name
|
|
376
|
+
|
|
377
|
+
content = chunk.get('content', '')
|
|
378
|
+
pretty_content = content
|
|
379
|
+
if isinstance(content, dict):
|
|
380
|
+
pretty_content = json.dumps(content, ensure_ascii=False, indent=2)
|
|
381
|
+
|
|
382
|
+
if chunk_type == "assistant_chunk":
|
|
383
|
+
logging.debug(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
|
|
384
|
+
else:
|
|
385
|
+
logging.info(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
|
|
386
|
+
except Exception as e:
|
|
387
|
+
logging.error(f"TaskEngine logging_reponse_chunk: Decorate chunk={chunk}, error: {e}")
|
|
356
388
|
|
|
357
|
-
logging.info(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}>: {chunk}")
|
|
358
389
|
|
|
359
390
|
|
|
360
391
|
if __name__ == "__main__":
|
|
@@ -371,9 +402,7 @@ if __name__ == "__main__":
|
|
|
371
402
|
engine = XGATaskEngine(tool_box=tool_box,
|
|
372
403
|
general_tools=[],
|
|
373
404
|
custom_tools=["*"],
|
|
374
|
-
llm_config=LLMConfig(stream=False),
|
|
375
405
|
system_prompt=system_prompt,
|
|
376
|
-
max_auto_run=8,
|
|
377
406
|
session_id="session_1",
|
|
378
407
|
agent_id="agent_1",)
|
|
379
408
|
|
xgae/utils/__init__.py
CHANGED
|
@@ -1,15 +1,27 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
def log_trace(exception: Exception, error: str=None) -> str:
|
|
5
|
+
import traceback
|
|
6
|
+
|
|
7
|
+
if error:
|
|
8
|
+
logging.error(f"{error} , error: {exception}")
|
|
9
|
+
|
|
10
|
+
trace_info = traceback.format_exc()
|
|
11
|
+
logging.error("Trace Details:\n%s", traceback.format_exc())
|
|
12
|
+
|
|
13
|
+
return trace_info
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_trace() -> str:
|
|
4
17
|
import traceback
|
|
5
18
|
|
|
6
|
-
|
|
7
|
-
logging.error("Traceback details:\n%s", traceback.format_exc())
|
|
8
|
-
raise (e) from e
|
|
19
|
+
return traceback.format_exc()
|
|
9
20
|
|
|
10
21
|
|
|
11
|
-
def to_bool(value:
|
|
22
|
+
def to_bool(value: any) -> bool:
|
|
12
23
|
if value is None:
|
|
13
24
|
return False
|
|
14
25
|
|
|
15
|
-
return value.lower() == "true"
|
|
26
|
+
return str(value).lower() == "true"
|
|
27
|
+
|
xgae/utils/json_helpers.py
CHANGED
|
@@ -159,10 +159,10 @@ def format_for_yield(message_object: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
159
159
|
|
|
160
160
|
# Ensure content is a JSON string
|
|
161
161
|
if 'content' in formatted and not isinstance(formatted['content'], str):
|
|
162
|
-
formatted['content'] = json.dumps(formatted['content'])
|
|
162
|
+
formatted['content'] = json.dumps(formatted['content'], ensure_ascii=False, indent=2)
|
|
163
163
|
|
|
164
164
|
# Ensure metadata is a JSON string
|
|
165
165
|
if 'metadata' in formatted and not isinstance(formatted['metadata'], str):
|
|
166
|
-
formatted['metadata'] = json.dumps(formatted['metadata'])
|
|
166
|
+
formatted['metadata'] = json.dumps(formatted['metadata'], ensure_ascii=False, indent=2)
|
|
167
167
|
|
|
168
168
|
return formatted
|
xgae/utils/llm_client.py
CHANGED
|
@@ -19,8 +19,8 @@ class LLMConfig(TypedDict, total=False):
|
|
|
19
19
|
api_base: str # Optional API base URL, Override .env LLM_API_BASE
|
|
20
20
|
temperature: float # temperature: Optional Sampling temperature (0-1), Override .env LLM_TEMPERATURE
|
|
21
21
|
max_tokens: int # max_tokens: Optional Maximum tokens in the response, Override .env LLM_MAX_TOKENS
|
|
22
|
-
stream: bool # stream: Optional whether to stream the response,
|
|
23
|
-
enable_thinking: bool # Optional whether to enable thinking,
|
|
22
|
+
stream: bool # stream: Optional whether to stream the response, Override .env LLM_STREAM
|
|
23
|
+
enable_thinking: bool # Optional whether to enable thinking, Override .env LLM_ENABLE_THINKING
|
|
24
24
|
reasoning_effort: str # Optional level of reasoning effort, default is ‘low’
|
|
25
25
|
response_format: str # response_format: Optional desired format for the response, default is None
|
|
26
26
|
top_p: int # Optional Top-p sampling parameter, default is None
|
|
@@ -58,6 +58,8 @@ class LLMClient:
|
|
|
58
58
|
env_llm_api_base = os.getenv("LLM_API_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
|
59
59
|
env_llm_max_tokens = int(os.getenv("LLM_MAX_TOKENS", 16384))
|
|
60
60
|
env_llm_temperature = float(os.getenv("LLM_TEMPERATURE", 0.7))
|
|
61
|
+
env_llm_stream = to_bool(os.getenv("LLM_STREAM", False))
|
|
62
|
+
env_llm_enable_thinking = to_bool(os.getenv("LLM_ENABLE_THINKING", False))
|
|
61
63
|
|
|
62
64
|
llm_config_params = {
|
|
63
65
|
"model": llm_config.get("model", env_llm_model),
|
|
@@ -67,8 +69,8 @@ class LLMClient:
|
|
|
67
69
|
"api_base": llm_config.get("api_base", env_llm_api_base),
|
|
68
70
|
"temperature": llm_config.get("temperature", env_llm_temperature),
|
|
69
71
|
"max_tokens": llm_config.get("max_tokens", env_llm_max_tokens),
|
|
70
|
-
"stream": llm_config.get("stream",
|
|
71
|
-
"enable_thinking": llm_config.get("enable_thinking",
|
|
72
|
+
"stream": llm_config.get("stream", env_llm_stream),
|
|
73
|
+
"enable_thinking": llm_config.get("enable_thinking", env_llm_enable_thinking),
|
|
72
74
|
"reasoning_effort": llm_config.get("reasoning_effort", 'low'),
|
|
73
75
|
"response_format": llm_config.get("response_format", None),
|
|
74
76
|
"top_p": llm_config.get("top_p", None),
|
|
@@ -115,27 +117,27 @@ class LLMClient:
|
|
|
115
117
|
# as it causes errors with inference profiles
|
|
116
118
|
if model_name.startswith("bedrock/") and "claude-3-7" in model_name:
|
|
117
119
|
prepared_llm_params.pop("max_tokens")
|
|
118
|
-
logging.debug(f"prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
|
|
120
|
+
logging.debug(f"LLMClient prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
|
|
119
121
|
else:
|
|
120
122
|
is_openai_o_series = 'o1' in model_name
|
|
121
123
|
is_openai_gpt5 = 'gpt-5' in model_name
|
|
122
124
|
param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens"
|
|
123
125
|
if param_name == "max_completion_tokens":
|
|
124
126
|
prepared_llm_params[param_name] = max_tokens
|
|
125
|
-
logging.debug(f"prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
|
|
127
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
|
|
126
128
|
|
|
127
129
|
# # Add Claude-specific headers
|
|
128
130
|
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
|
129
131
|
prepared_llm_params["extra_headers"] = {
|
|
130
132
|
"anthropic-beta": "output-128k-2025-02-19"
|
|
131
133
|
}
|
|
132
|
-
logging.debug(f"prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
|
|
134
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
|
|
133
135
|
|
|
134
136
|
# Add Bedrock-specific parameters
|
|
135
137
|
if model_name.startswith("bedrock/"):
|
|
136
138
|
if not model_id and "anthropic.claude-3-7-sonnet" in model_name:
|
|
137
139
|
prepared_llm_params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
|
|
138
|
-
logging.debug(f"prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
|
|
140
|
+
logging.debug(f"LLMClient prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
|
|
139
141
|
|
|
140
142
|
# Apply Anthropic prompt caching (minimal implementation)
|
|
141
143
|
effective_model_name = llm_config_params.get("model", model_name)
|
|
@@ -143,14 +145,14 @@ class LLMClient:
|
|
|
143
145
|
# OpenAI GPT-5: drop unsupported temperature param (only default 1 allowed)
|
|
144
146
|
if "gpt-5" in effective_model_name and "temperature" in llm_config_params and llm_config_params["temperature"] != 1:
|
|
145
147
|
prepared_llm_params.pop("temperature", None)
|
|
146
|
-
logging.debug(f"prepare_llm_params: Remove 'temperature' param for model: {model_name}")
|
|
148
|
+
logging.debug(f"LLMClient prepare_llm_params: Remove 'temperature' param for model: {model_name}")
|
|
147
149
|
|
|
148
150
|
# OpenAI GPT-5: request priority service tier when calling OpenAI directly
|
|
149
151
|
# Pass via both top-level and extra_body for LiteLLM compatibility
|
|
150
152
|
if "gpt-5" in effective_model_name and not effective_model_name.startswith("openrouter/"):
|
|
151
153
|
prepared_llm_params["service_tier"] = "priority"
|
|
152
154
|
prepared_llm_params["extra_body"] = {"service_tier": "priority"}
|
|
153
|
-
logging.debug(f"prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
|
|
155
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
|
|
154
156
|
|
|
155
157
|
# Add reasoning_effort for Anthropic models if enabled
|
|
156
158
|
enable_thinking = llm_config_params.get("enable_thinking")
|
|
@@ -163,14 +165,14 @@ class LLMClient:
|
|
|
163
165
|
prepared_llm_params["provider"] = {
|
|
164
166
|
"order": ["together/fp8", "novita/fp8", "baseten/fp8", "moonshotai", "groq"]
|
|
165
167
|
}
|
|
166
|
-
logging.debug(f"prepare_llm_params: Add 'provider' param for model: {model_name}")
|
|
168
|
+
logging.debug(f"LLMClient prepare_llm_params: Add 'provider' param for model: {model_name}")
|
|
167
169
|
|
|
168
170
|
reasoning_effort = llm_config_params.get("reasoning_effort")
|
|
169
171
|
if is_anthropic and use_thinking:
|
|
170
172
|
effort_level = reasoning_effort if reasoning_effort else 'low'
|
|
171
173
|
prepared_llm_params["reasoning_effort"] = effort_level
|
|
172
174
|
prepared_llm_params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used
|
|
173
|
-
logging.debug(f"prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
|
|
175
|
+
logging.debug(f"LLMClient prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
|
|
174
176
|
|
|
175
177
|
return prepared_llm_params
|
|
176
178
|
|
|
@@ -204,7 +206,7 @@ class LLMClient:
|
|
|
204
206
|
{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
|
|
205
207
|
]
|
|
206
208
|
cache_control_count += 1
|
|
207
|
-
logging.debug(f"prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
|
|
209
|
+
logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
|
|
208
210
|
elif isinstance(content, list):
|
|
209
211
|
for item in content:
|
|
210
212
|
if cache_control_count >= max_cache_control_blocks:
|
|
@@ -212,7 +214,7 @@ class LLMClient:
|
|
|
212
214
|
if isinstance(item, dict) and item.get("type") == "text" and "cache_control" not in item:
|
|
213
215
|
item["cache_control"] = {"type": "ephemeral"}
|
|
214
216
|
cache_control_count += 1
|
|
215
|
-
logging.debug(f"prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
|
|
217
|
+
logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
|
|
216
218
|
|
|
217
219
|
return complete_params
|
|
218
220
|
|
|
@@ -232,18 +234,18 @@ class LLMClient:
|
|
|
232
234
|
last_error = None
|
|
233
235
|
for attempt in range(self.max_retries):
|
|
234
236
|
try:
|
|
235
|
-
logging.info(f"*** create_completion
|
|
237
|
+
logging.info(f"*** LLMClient create_completion: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
|
|
236
238
|
response = await litellm.acompletion(**complete_params)
|
|
237
239
|
return response
|
|
238
240
|
except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
|
|
239
241
|
last_error = e
|
|
240
242
|
await self._handle_llm_error(e, attempt)
|
|
241
243
|
except Exception as e:
|
|
242
|
-
logging.error(f"create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
|
|
243
|
-
raise LLMError(f"
|
|
244
|
+
logging.error(f"LLMClient create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
|
|
245
|
+
raise LLMError(f"LLMClient create completion failed: {e}")
|
|
244
246
|
|
|
245
|
-
logging.error(f"create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
|
|
246
|
-
raise LLMError(f"
|
|
247
|
+
logging.error(f"LLMClient create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
|
|
248
|
+
raise LLMError(f"LLMClient create completion failed after {self.max_retries} attempts !")
|
|
247
249
|
|
|
248
250
|
if __name__ == "__main__":
|
|
249
251
|
from xgae.utils.setup_env import setup_logging
|
xgae/utils/misc.py
CHANGED
|
@@ -4,7 +4,6 @@ import sys
|
|
|
4
4
|
|
|
5
5
|
from typing import Any, Dict
|
|
6
6
|
|
|
7
|
-
from xgae.utils import handle_error
|
|
8
7
|
|
|
9
8
|
def read_file(file_path: str) -> str:
|
|
10
9
|
if not os.path.exists(file_path):
|
|
@@ -17,7 +16,7 @@ def read_file(file_path: str) -> str:
|
|
|
17
16
|
return content
|
|
18
17
|
except Exception as e:
|
|
19
18
|
logging.error(f"Read file '{file_path}' failed")
|
|
20
|
-
|
|
19
|
+
raise
|
|
21
20
|
|
|
22
21
|
def format_file_with_args(file_content:str, args: Dict[str, Any])-> str:
|
|
23
22
|
from io import StringIO
|
xgae/utils/setup_env.py
CHANGED
|
@@ -60,6 +60,7 @@ def setup_env_logging():
|
|
|
60
60
|
log_file = os.getenv("LOG_FILE", "log/xga.log")
|
|
61
61
|
if log_enable :
|
|
62
62
|
setup_logging(log_file, log_level)
|
|
63
|
+
setup_logging(log_file, log_level)
|
|
63
64
|
|
|
64
65
|
def setup_langfuse() -> Langfuse:
|
|
65
66
|
env_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
xgae/__init__.py,sha256=OEUd9y9AoGBd3xYerdTTpz9xl4NWkmXeq1a2eil7Qro,72
|
|
2
|
+
xgae/cli_app.py,sha256=vKuCIJw0gwXdtkT-QNCZKt2dE53thvTFwQr7nTgvaPY,3000
|
|
3
|
+
xgae/engine/engine_base.py,sha256=-QZqLRbQdwRUfbY4l3i7dFfMB-BL267a-wGZR9bMPLc,1662
|
|
4
|
+
xgae/engine/mcp_tool_box.py,sha256=eE4qGxTHaSMMNDDWWafNXFT-vj_YYof4AjVSsxKoq68,10413
|
|
5
|
+
xgae/engine/prompt_builder.py,sha256=X9bS7YIms6LYplCpNHeUmi74xFP5MwFXmXNqOt1Xz-Q,4356
|
|
6
|
+
xgae/engine/task_engine.py,sha256=ZWxi292fceWvZuv501lwUGgb_PStktmfLFDzhDlsFfY,21011
|
|
7
|
+
xgae/engine/task_langfuse.py,sha256=b0aJ_Di-WDcYzi0TFCvcKWxkBz7PYP2jx3N52OptQMs,2349
|
|
8
|
+
xgae/engine/responser/non_stream_responser.py,sha256=RS2fIP_XCWjZEVtFRSNDJ9wM1N66MuzA66wXm3Nz1Jg,5583
|
|
9
|
+
xgae/engine/responser/responser_base.py,sha256=WsUMUfEE2cexAg5LzXA1yUECOkbs1ekh8HbJS5-R7f8,30813
|
|
10
|
+
xgae/engine/responser/stream_responser.py,sha256=O6_wSwdbqjYO-XowiLvHZKuw-F6fvxyjWULhfkkF6ow,7830
|
|
11
|
+
xgae/tools/without_general_tools_app.py,sha256=FGMV6njcOKwwfitc0j_nUov0RC-eWlhO1IP8_KHz1tQ,3788
|
|
12
|
+
xgae/utils/__init__.py,sha256=ElaGS-zdeZeu6is41u3Ny7lkvhg7BDSK-jMNg9j6K5A,499
|
|
13
|
+
xgae/utils/json_helpers.py,sha256=ubp-dOCeROnZv7JHARRdmDIO5Npdwzrt8AWo3SMv0kI,4705
|
|
14
|
+
xgae/utils/llm_client.py,sha256=6e3kzx73QN6z2SYMQQFmrmODj2Rk-GPJYIxBcFZhMQE,14361
|
|
15
|
+
xgae/utils/misc.py,sha256=aMWOvJ9VW52q-L9Lkjl1hvXqLwpJAmyxA-Z8jzqFG0U,907
|
|
16
|
+
xgae/utils/setup_env.py,sha256=MqNG0c2QQBDFU1kI8frxr9kB5d08Mmi3QZ1OoorgIa0,2662
|
|
17
|
+
xgae/utils/xml_tool_parser.py,sha256=I9xAZC_ElwBY19PNUq-WLXe9FSIJMeAv2Xs-VlajI7Y,4782
|
|
18
|
+
xgae-0.1.13.dist-info/METADATA,sha256=8y0v909gMEo6oyUYTrY5ZjRP8ACB8U-BPHZTDd5uq4M,310
|
|
19
|
+
xgae-0.1.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
20
|
+
xgae-0.1.13.dist-info/entry_points.txt,sha256=vClvL_WBJyF2x3wJCz5CNJ_BJG-dWUh7h2YbAoskHsc,162
|
|
21
|
+
xgae-0.1.13.dist-info/RECORD,,
|
xgae-0.1.10.dist-info/RECORD
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
xgae/__init__.py,sha256=OEUd9y9AoGBd3xYerdTTpz9xl4NWkmXeq1a2eil7Qro,72
|
|
2
|
-
xgae/cli_app.py,sha256=ePis7gYYZrevEArnCQOhaN7z4C8Y5yJSOIov8z-lGBs,3157
|
|
3
|
-
xgae/engine/engine_base.py,sha256=ioywuTpDMHEmyVcd6BInoU-vR70PhQStE2MVRWoEiJg,1768
|
|
4
|
-
xgae/engine/mcp_tool_box.py,sha256=ZSCBSXRWhISwyZ1uEIbt3esjesM46g-ktv6CxvyPVDU,10030
|
|
5
|
-
xgae/engine/prompt_builder.py,sha256=X9bS7YIms6LYplCpNHeUmi74xFP5MwFXmXNqOt1Xz-Q,4356
|
|
6
|
-
xgae/engine/task_engine.py,sha256=LAo55FKmmO8Jbo5geEUYr8kFCaVigTb-Jm06XuYKYyY,19010
|
|
7
|
-
xgae/engine/task_langfuse.py,sha256=b0aJ_Di-WDcYzi0TFCvcKWxkBz7PYP2jx3N52OptQMs,2349
|
|
8
|
-
xgae/engine/responser/non_stream_responser.py,sha256=9YCCUedbotH-TPPbTh2Mv1qNVYvznHYFPgAnQB7NJSE,6510
|
|
9
|
-
xgae/engine/responser/responser_base.py,sha256=8PcsvQHP68FEhu6v3dT9hDCc_rLKs38i4txWLcJD4ck,29851
|
|
10
|
-
xgae/engine/responser/stream_responser.py,sha256=oPGtrT1nedGMjiBAwPzUlu6Z_rPWeVSODC1xQ6D8cTY,52055
|
|
11
|
-
xgae/tools/without_general_tools_app.py,sha256=FGMV6njcOKwwfitc0j_nUov0RC-eWlhO1IP8_KHz1tQ,3788
|
|
12
|
-
xgae/utils/__init__.py,sha256=_-TTNq5FanrA-jl_w3-4xp-BnRM7SLwfYQcFyvepcW0,332
|
|
13
|
-
xgae/utils/json_helpers.py,sha256=6BkqiyEF3jV3Irb4Z6-wGY2_FNaLlxE1WKlMJHHT6E0,4645
|
|
14
|
-
xgae/utils/llm_client.py,sha256=hvEDb4DBaWVQTXMjXOd6KrFwJFBcI-YXEQD4f_AhG7Q,14008
|
|
15
|
-
xgae/utils/misc.py,sha256=M8lMXYp1pHiY6Ee8ZTUG88GpOAsE5fbYoRO_hcBFUCE,953
|
|
16
|
-
xgae/utils/setup_env.py,sha256=HweQ-WAyxfV3KYjGYi-rRQAbI_SXoimduOLpQPbHfl8,2619
|
|
17
|
-
xgae/utils/xml_tool_parser.py,sha256=I9xAZC_ElwBY19PNUq-WLXe9FSIJMeAv2Xs-VlajI7Y,4782
|
|
18
|
-
xgae-0.1.10.dist-info/METADATA,sha256=SGnhZrr3DDZ600FDMep9ihERmhwFspEtUKRv1THqQsk,310
|
|
19
|
-
xgae-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
20
|
-
xgae-0.1.10.dist-info/entry_points.txt,sha256=vClvL_WBJyF2x3wJCz5CNJ_BJG-dWUh7h2YbAoskHsc,162
|
|
21
|
-
xgae-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|