xgae 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xgae might be problematic. Click here for more details.

@@ -5,7 +5,7 @@ import os
5
5
  from typing import List, Any, Dict, Optional, AsyncGenerator, Union, Literal
6
6
  from uuid import uuid4
7
7
 
8
- from xgae.utils import handle_error
8
+ from xgae.utils import handle_error, to_bool
9
9
  from xgae.utils.llm_client import LLMClient, LLMConfig
10
10
  from xgae.utils.json_helpers import format_for_yield
11
11
 
@@ -46,6 +46,8 @@ class XGATaskEngine:
46
46
 
47
47
  max_auto_run = max_auto_run if max_auto_run else int(os.getenv("MAX_AUTO_RUN", 15))
48
48
  self.max_auto_run: int = 1 if max_auto_run <= 1 else max_auto_run
49
+
50
+ self.use_assistant_chunk_msg = to_bool(os.getenv("USE_ASSISTANT_CHUNK_MSG", False))
49
51
  self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
50
52
 
51
53
  self.task_no = -1
@@ -67,7 +69,7 @@ class XGATaskEngine:
67
69
  chunks.append(chunk)
68
70
 
69
71
  if len(chunks) > 0:
70
- final_result = self._parse_final_result(chunks)
72
+ final_result = self.parse_final_result(chunks)
71
73
  else:
72
74
  final_result = XGATaskResult(type="error", content="LLM Answer is Empty")
73
75
 
@@ -117,22 +119,22 @@ class XGATaskEngine:
117
119
 
118
120
  self.task_prompt = self.prompt_builder.build_task_prompt(self.model_name, general_tool_schemas, custom_tool_schemas)
119
121
 
120
- logging.info("*" * 30 + f" XGATaskEngine Task'{self.task_id}' Initialized " + "*" * 30)
122
+ logging.info("*" * 10 + f" XGATaskEngine Task'{self.task_id}' Initialized " + "*" * 10)
121
123
  logging.info(f"model_name={self.model_name}, is_stream={self.is_stream}")
122
124
  logging.info(f"general_tools={general_tools}, custom_tools={custom_tools}")
123
125
 
124
126
 
125
127
  async def _run_task_auto(self) -> AsyncGenerator[Dict[str, Any], None]:
126
- def update_continuous_state(_auto_continue_count, _auto_continue):
127
- continuous_state["auto_continue_count"] = _auto_continue_count
128
- continuous_state["auto_continue"] = _auto_continue
129
-
130
128
  continuous_state: TaskRunContinuousState = {
131
129
  "accumulated_content": "",
132
130
  "auto_continue_count": 0,
133
131
  "auto_continue": False if self.max_auto_run <= 1 else True
134
132
  }
135
133
 
134
+ def update_continuous_state(_auto_continue_count, _auto_continue):
135
+ continuous_state["auto_continue_count"] = _auto_continue_count
136
+ continuous_state["auto_continue"] = _auto_continue
137
+
136
138
  auto_continue_count = 0
137
139
  auto_continue = True
138
140
  while auto_continue and auto_continue_count < self.max_auto_run:
@@ -146,34 +148,45 @@ class XGATaskEngine:
146
148
  content = json.loads(chunk.get('content', '{}'))
147
149
  status_type = content.get('status_type', None)
148
150
  if status_type == "error":
149
- logging.error(f"run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
151
+ logging.error(f"TaskEngine run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
150
152
  auto_continue = False
151
153
  break
152
154
  elif status_type == 'finish':
153
155
  finish_reason = content.get('finish_reason', None)
154
156
  if finish_reason == 'completed':
155
- logging.info(f"run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
157
+ logging.info(f"TaskEngine run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
156
158
  auto_continue = False
157
159
  break
158
160
  elif finish_reason == 'xml_tool_limit_reached':
159
- logging.warning(f"run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
161
+ logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
162
+ auto_continue = False
163
+ break
164
+ elif finish_reason == 'non_tool_call':
165
+ logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
160
166
  auto_continue = False
161
167
  break
162
168
  elif finish_reason == 'stop' or finish_reason == 'length': # 'length' never occur
163
169
  auto_continue = True
164
170
  auto_continue_count += 1
165
171
  update_continuous_state(auto_continue_count, auto_continue)
166
- logging.info(f"run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
172
+ logging.info(f"TaskEngine run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
167
173
  except Exception as parse_error:
168
- logging.error(f"run_task_auto: Error in parse chunk: {str(parse_error)}")
169
- content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
174
+ logging.error(f"TaskEngine run_task_auto: Parse chunk error, chunk: {chunk}")
170
175
  handle_error(parse_error)
176
+ self.task_langfuse.root_span.event(name="engine_parse_chunk_error", level="ERROR",
177
+ status_message=(f"Task Engine parse chunk error: {parse_error}"),
178
+ metadata={"content": chunk})
179
+
180
+ content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
171
181
  error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
172
182
  yield format_for_yield(error_msg)
173
183
  except Exception as run_error:
174
- logging.error(f"run_task_auto: Call task_run_once error: {str(run_error)}")
175
- content = {"role": "system", "status_type": "error", "message": "Call task_run_once error"}
184
+ logging.error(f"TaskEngine run_task_auto: Call task_run_once error: {run_error}")
176
185
  handle_error(run_error)
186
+ self.task_langfuse.root_span.event(name="engine_task_run_once_error", level="ERROR",
187
+ status_message=(f"Call task_run_once error: {run_error}"))
188
+
189
+ content = {"role": "system", "status_type": "error", "message": "Call run_task_once error"}
177
190
  error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
178
191
  yield format_for_yield(error_msg)
179
192
 
@@ -191,21 +204,25 @@ class XGATaskEngine:
191
204
  }
192
205
  llm_messages.append(temp_assistant_message)
193
206
 
194
- llm_count = continuous_state.get("auto_continue_count")
195
- langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(llm_count)
207
+ auto_count = continuous_state.get("auto_continue_count")
208
+ langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(auto_count)
196
209
 
210
+ self.task_langfuse.root_span.event(name="engine_start_create_completion", level="DEFAULT",
211
+ status_message=(f"Task Engine start create_completion llm_messages len={len(llm_messages)}"))
197
212
  llm_response = await self.llm_client.create_completion(llm_messages, langfuse_metadata)
198
213
  response_processor = self._create_response_processer()
199
214
 
200
215
  async for chunk in response_processor.process_response(llm_response, llm_messages, continuous_state):
201
- self._logging_reponse_chunk(chunk)
216
+ self._logging_reponse_chunk(chunk, auto_count)
202
217
  yield chunk
203
218
 
204
- def _parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
219
+ def parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
205
220
  final_result: XGATaskResult = None
221
+ reverse_chunks = reversed(chunks)
222
+ chunk = None
206
223
  try:
207
224
  finish_reason = ''
208
- for chunk in reversed(chunks):
225
+ for chunk in reverse_chunks:
209
226
  chunk_type = chunk.get("type")
210
227
  if chunk_type == "status":
211
228
  status_content = json.loads(chunk.get('content', '{}'))
@@ -215,10 +232,7 @@ class XGATaskEngine:
215
232
  final_result = XGATaskResult(type="error", content=error)
216
233
  elif status_type == "finish":
217
234
  finish_reason = status_content.get('finish_reason', None)
218
- if finish_reason == 'xml_tool_limit_reached':
219
- error = "Completed due to over task max_auto_run limit !"
220
- final_result = XGATaskResult(type="error", content=error)
221
- elif chunk_type == "tool" and finish_reason in ['completed', 'stop']:
235
+ elif chunk_type == "tool" and finish_reason in ['completed', 'stop', 'xml_tool_limit_reached']:
222
236
  tool_content = json.loads(chunk.get('content', '{}'))
223
237
  tool_execution = tool_content.get('tool_execution')
224
238
  tool_name = tool_execution.get('function_name')
@@ -238,22 +252,33 @@ class XGATaskEngine:
238
252
  result_type = "answer" if success else "error"
239
253
  result_content = f"Task execute '{tool_name}' {result_type}: {output}"
240
254
  final_result = XGATaskResult(type=result_type, content=result_content)
241
- elif chunk_type == "assistant_complete" and finish_reason == 'stop':
255
+ elif chunk_type == "assistant" and finish_reason == 'non_tool_call':
242
256
  assis_content = chunk.get('content', {})
243
257
  result_content = assis_content.get("content", "LLM output is empty")
244
258
  final_result = XGATaskResult(type="answer", content=result_content)
245
259
 
246
- if final_result is not None:
260
+ if final_result:
247
261
  break
262
+
263
+ if final_result and finish_reason == "completed":
264
+ logging.info(f"✅ FINAL_RESULT: finish_reason={finish_reason}, final_result={final_result}")
265
+ elif final_result is not None:
266
+ logging.warning(f"⚠️ FINAL_RESULT: finish_reason={finish_reason}, final_result={final_result}")
267
+ else:
268
+ logging.warning(f"❌ FINAL_RESULT: LLM Result is EMPTY, finish_reason={finish_reason}")
269
+ final_result = XGATaskResult(type="error", content="LLM has no answer")
248
270
  except Exception as e:
249
- logging.error(f"parse_final_result: Final result pass error: {str(e)}")
250
- final_result = XGATaskResult(type="error", content="Parse final result failed!")
271
+ logging.error(f"TaskEngine parse_final_result: Parse message chunk error, chunk: {chunk}")
251
272
  handle_error(e)
273
+ self.task_langfuse.root_span.event(name="engine_parse_final_result_error", level="ERROR",
274
+ status_message=(f"Task Engine parse final result error: {e}"),
275
+ metadata={"content": chunk})
252
276
 
253
- return final_result
277
+ final_result = XGATaskResult(type="error", content="Parse final result failed!")
254
278
 
279
+ return final_result
255
280
 
256
- def add_response_message(self, type: XGAResponseMsgType,
281
+ def create_response_message(self, type: XGAResponseMsgType,
257
282
  content: Union[Dict[str, Any], List[Any], str],
258
283
  is_llm_message: bool,
259
284
  metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
@@ -271,10 +296,17 @@ class XGATaskEngine:
271
296
  content = content,
272
297
  metadata = metadata
273
298
  )
274
- self.task_response_msgs.append(message)
275
299
 
276
300
  return message
277
301
 
302
+ def add_response_message(self, type: XGAResponseMsgType,
303
+ content: Union[Dict[str, Any], List[Any], str],
304
+ is_llm_message: bool,
305
+ metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
306
+ message = self.create_response_message(type, content, is_llm_message, metadata)
307
+ self.task_response_msgs.append(message)
308
+ return message
309
+
278
310
  def get_history_llm_messages (self) -> List[Dict[str, Any]]:
279
311
  llm_messages = []
280
312
  for message in self.task_response_msgs:
@@ -284,13 +316,12 @@ class XGATaskEngine:
284
316
  response_llm_contents = []
285
317
  for llm_message in llm_messages:
286
318
  content = llm_message["content"]
287
- # @todo content List type
288
319
  if isinstance(content, str):
289
320
  try:
290
321
  _content = json.loads(content)
291
322
  response_llm_contents.append(_content)
292
323
  except json.JSONDecodeError as e:
293
- logging.error(f"get_context_llm_contents: Failed to decode json, content=:{content}")
324
+ logging.error(f"TaskEngine get_history_llm_messages: Failed to decode json, content: {content}")
294
325
  handle_error(e)
295
326
  else:
296
327
  response_llm_contents.append(content)
@@ -315,9 +346,11 @@ class XGATaskEngine:
315
346
  "task_no": self.task_no,
316
347
  "model_name": self.model_name,
317
348
  "max_xml_tool_calls": 0,
349
+ "use_assistant_chunk_msg": self.use_assistant_chunk_msg,
318
350
  "tool_execution_strategy": "parallel" if self.tool_exec_parallel else "sequential", # ,
319
351
  "xml_adding_strategy": "user_message",
320
352
  "add_response_msg_func": self.add_response_message,
353
+ "create_response_msg_func": self.create_response_message,
321
354
  "tool_box": self.tool_box,
322
355
  "task_langfuse": self.task_langfuse,
323
356
  }
@@ -328,37 +361,52 @@ class XGATaskEngine:
328
361
  return XGATaskLangFuse(self.session_id, self.task_id, self.task_run_id, self.task_no, self.agent_id)
329
362
 
330
363
 
331
- def _logging_reponse_chunk(self, chunk):
332
- chunk_type = chunk.get('type')
333
- prefix = ""
334
-
335
- if chunk_type == 'status':
336
- content = json.loads(chunk.get('content', '{}'))
337
- status_type = content.get('status_type', "empty")
338
- prefix = "-" + status_type
339
- elif chunk_type == 'tool':
340
- tool_content = json.loads(chunk.get('content', '{}'))
341
- tool_execution = tool_content.get('tool_execution')
342
- tool_name = tool_execution.get('function_name')
343
- prefix = "-" + tool_name
364
+ def _logging_reponse_chunk(self, chunk, auto_count: int)-> None:
365
+ try:
366
+ chunk_type = chunk.get('type', 'unknown')
367
+ prefix = ""
368
+ if chunk_type == 'status':
369
+ content = json.loads(chunk.get('content', '{}'))
370
+ status_type = content.get('status_type', "empty")
371
+ if status_type in ["tool_started", "tool_completed"]:
372
+ return
373
+ prefix = "-" + status_type
374
+ elif chunk_type == 'tool':
375
+ tool_content = json.loads(chunk.get('content', '{}'))
376
+ tool_execution = tool_content.get('tool_execution')
377
+ tool_name = tool_execution.get('function_name')
378
+ prefix = "-" + tool_name
379
+
380
+ content = chunk.get('content', '')
381
+ pretty_content = content
382
+ if isinstance(content, dict):
383
+ pretty_content = json.dumps(content, ensure_ascii=False, indent=2)
384
+
385
+ if chunk_type == "assistant_chunk":
386
+ logging.debug(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
387
+ else:
388
+ logging.info(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
344
389
 
345
- logging.info(f"TASK_RESP_CHUNK[{chunk_type}{prefix}]: {chunk}")
390
+ except Exception as e:
391
+ logging.error(f"TaskEngine logging_reponse_chunk: Decorate chunk log error, chunk: {chunk}")
392
+ handle_error(e)
346
393
 
347
394
 
348
395
  if __name__ == "__main__":
349
396
  import asyncio
350
397
  from xgae.utils.misc import read_file
398
+ from xgae.utils.setup_env import setup_logging
399
+
400
+ setup_logging()
351
401
 
352
402
  async def main():
353
403
  # Before Run Exec: uv run custom_fault_tools
354
404
  tool_box = XGAMcpToolBox(custom_mcp_server_file="mcpservers/custom_servers.json")
355
- system_prompt = read_file("templates/example_user_prompt.txt")
405
+ system_prompt = read_file("templates/example/fault_user_prompt.txt")
356
406
  engine = XGATaskEngine(tool_box=tool_box,
357
407
  general_tools=[],
358
408
  custom_tools=["*"],
359
- llm_config=LLMConfig(stream=False),
360
409
  system_prompt=system_prompt,
361
- max_auto_run=8,
362
410
  session_id="session_1",
363
411
  agent_id="agent_1",)
364
412
 
@@ -2,20 +2,22 @@
2
2
  from typing import Any, Dict, Optional
3
3
  from langfuse import Langfuse
4
4
 
5
- from xgae.utils.setup_env import setup_langfuse, setup_env_logging
5
+ from xgae.utils.setup_env import setup_langfuse
6
6
  from xgae.utils.llm_client import LangfuseMetadata
7
7
  from xgae.engine.engine_base import XGATaskResult
8
8
 
9
- setup_env_logging()
10
- langfuse:Langfuse = setup_langfuse()
11
-
12
9
  class XGATaskLangFuse:
10
+ langfuse: Langfuse = None
11
+
13
12
  def __init__(self,
14
13
  session_id: str,
15
14
  task_id:str,
16
15
  task_run_id: str,
17
16
  task_no: int,
18
17
  agent_id: str) -> None:
18
+ if XGATaskLangFuse.langfuse is None:
19
+ XGATaskLangFuse.langfuse = setup_langfuse()
20
+
19
21
  self.session_id = session_id
20
22
  self.task_id = task_id
21
23
  self.task_run_id = task_run_id
@@ -35,9 +37,9 @@ class XGATaskLangFuse:
35
37
  trace = None
36
38
  if trace_id:
37
39
  self.trace_id = trace_id
38
- trace = langfuse.trace(id=trace_id)
40
+ trace = XGATaskLangFuse.langfuse.trace(id=trace_id)
39
41
  else:
40
- trace = langfuse.trace(name="xga_task_engine")
42
+ trace = XGATaskLangFuse.langfuse.trace(name="xga_task_engine")
41
43
  self.trace_id = trace.id
42
44
 
43
45
  metadata = {"task_id": self.task_id, "session_id": self.session_id, "agent_id": self.agent_id}
@@ -3,7 +3,6 @@ from pydantic import Field
3
3
 
4
4
  from mcp.server.fastmcp import FastMCP
5
5
 
6
- from xgae.engine.engine_base import XGAToolResult
7
6
 
8
7
  mcp = FastMCP(name="XGAE Message Tools")
9
8
 
@@ -17,7 +16,7 @@ async def complete(task_id: str,
17
16
  description="Comma-separated list of final outputs. Use when: 1) Completion relates to files 2) User needs to review outputs 3) Deliverables in files")]
18
17
  ):
19
18
  print(f"<XGAETools-complete>: task_id={task_id}, text={text}, attachments={attachments}")
20
- return XGAToolResult(success=True, output=str({"status": "complete"}))
19
+ return {"status": "complete"}
21
20
 
22
21
 
23
22
  @mcp.tool(
@@ -30,7 +29,7 @@ async def ask(task_id: str,
30
29
  description="Comma-separated list of files/URLs to attach. Use when: 1) Question relates to files/configs 2) User needs to review content 3) Options documented in files 4) Supporting evidence needed")]
31
30
  ):
32
31
  print(f"<XGAETools-ask>: task_id={task_id}, text={text}, attachments={attachments}")
33
- return XGAToolResult(success=True, output=str({"status": "Awaiting user response..."}))
32
+ return {"status": "Awaiting user response..."}
34
33
 
35
34
  @mcp.tool(
36
35
  description="end task, destroy sandbox"
xgae/utils/__init__.py CHANGED
@@ -8,8 +8,8 @@ def handle_error(e: Exception) -> None:
8
8
  raise (e) from e
9
9
 
10
10
 
11
- def to_bool(value: str) -> bool:
11
+ def to_bool(value: any) -> bool:
12
12
  if value is None:
13
13
  return False
14
14
 
15
- return True if value.lower() == "true" else False
15
+ return str(value).lower() == "true"
@@ -159,10 +159,10 @@ def format_for_yield(message_object: Dict[str, Any]) -> Dict[str, Any]:
159
159
 
160
160
  # Ensure content is a JSON string
161
161
  if 'content' in formatted and not isinstance(formatted['content'], str):
162
- formatted['content'] = json.dumps(formatted['content'])
162
+ formatted['content'] = json.dumps(formatted['content'], ensure_ascii=False, indent=2)
163
163
 
164
164
  # Ensure metadata is a JSON string
165
165
  if 'metadata' in formatted and not isinstance(formatted['metadata'], str):
166
- formatted['metadata'] = json.dumps(formatted['metadata'])
166
+ formatted['metadata'] = json.dumps(formatted['metadata'], ensure_ascii=False, indent=2)
167
167
 
168
168
  return formatted
xgae/utils/llm_client.py CHANGED
@@ -8,6 +8,7 @@ from typing import Union, Dict, Any, Optional, List, TypedDict
8
8
  from openai import OpenAIError
9
9
  from litellm.utils import ModelResponse, CustomStreamWrapper
10
10
 
11
+ from xgae.utils import to_bool
11
12
  from xgae.utils.setup_env import setup_langfuse
12
13
 
13
14
  class LLMConfig(TypedDict, total=False):
@@ -18,8 +19,8 @@ class LLMConfig(TypedDict, total=False):
18
19
  api_base: str # Optional API base URL, Override .env LLM_API_BASE
19
20
  temperature: float # temperature: Optional Sampling temperature (0-1), Override .env LLM_TEMPERATURE
20
21
  max_tokens: int # max_tokens: Optional Maximum tokens in the response, Override .env LLM_MAX_TOKENS
21
- stream: bool # stream: Optional whether to stream the response, default is True
22
- enable_thinking: bool # Optional whether to enable thinking, default is False
22
+ stream: bool # stream: Optional whether to stream the response, Override .env LLM_STREAM
23
+ enable_thinking: bool # Optional whether to enable thinking, Override .env LLM_ENABLE_THINKING
23
24
  reasoning_effort: str # Optional level of reasoning effort, default is ‘low’
24
25
  response_format: str # response_format: Optional desired format for the response, default is None
25
26
  top_p: int # Optional Top-p sampling parameter, default is None
@@ -46,6 +47,7 @@ class LLMClient:
46
47
  def __init__(self, llm_config: LLMConfig=None):
47
48
  litellm.modify_params = True
48
49
  litellm.drop_params = True
50
+
49
51
  self._init_langfuse()
50
52
 
51
53
  llm_config = llm_config or LLMConfig()
@@ -56,6 +58,8 @@ class LLMClient:
56
58
  env_llm_api_base = os.getenv("LLM_API_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
57
59
  env_llm_max_tokens = int(os.getenv("LLM_MAX_TOKENS", 16384))
58
60
  env_llm_temperature = float(os.getenv("LLM_TEMPERATURE", 0.7))
61
+ env_llm_stream = to_bool(os.getenv("LLM_STREAM", False))
62
+ env_llm_enable_thinking = to_bool(os.getenv("LLM_ENABLE_THINKING", False))
59
63
 
60
64
  llm_config_params = {
61
65
  "model": llm_config.get("model", env_llm_model),
@@ -65,8 +69,8 @@ class LLMClient:
65
69
  "api_base": llm_config.get("api_base", env_llm_api_base),
66
70
  "temperature": llm_config.get("temperature", env_llm_temperature),
67
71
  "max_tokens": llm_config.get("max_tokens", env_llm_max_tokens),
68
- "stream": llm_config.get("stream", True),
69
- "enable_thinking": llm_config.get("enable_thinking", False),
72
+ "stream": llm_config.get("stream", env_llm_stream),
73
+ "enable_thinking": llm_config.get("enable_thinking", env_llm_enable_thinking),
70
74
  "reasoning_effort": llm_config.get("reasoning_effort", 'low'),
71
75
  "response_format": llm_config.get("response_format", None),
72
76
  "top_p": llm_config.get("top_p", None),
@@ -78,22 +82,27 @@ class LLMClient:
78
82
  self.is_stream = llm_config_params['stream']
79
83
 
80
84
  self.lite_llm_params = self._prepare_llm_params(llm_config_params)
81
- logging.info(f"📡 LLMClient initialed : model={self.model_name}, is_stream={self.is_stream}, enable thinking={self.lite_llm_params['enable_thinking']}")
85
+ logging.info(f"=== LLMClient initialed : model={self.model_name}, is_stream={self.is_stream}, enable thinking={self.lite_llm_params['enable_thinking']}")
82
86
 
83
87
  @staticmethod
84
88
  def _init_langfuse():
85
89
  if not LLMClient.langfuse_inited:
86
90
  LLMClient.langfuse_inited =True
87
- env_langfuse = setup_langfuse()
88
- if env_langfuse and env_langfuse.enabled:
89
- litellm.success_callback = ["langfuse"]
90
- litellm.failure_callback = ["langfuse"]
91
- LLMClient.langfuse_enabled = True
92
- logging.info("=== LiteLLM Langfuse is enable !")
91
+
92
+ env_llm_langfuse_enable = to_bool(os.getenv("LLM_LANGFUSE_ENABLE", False))
93
+ if env_llm_langfuse_enable:
94
+ env_langfuse = setup_langfuse()
95
+ if env_langfuse and env_langfuse.enabled:
96
+ litellm.success_callback = ["langfuse"]
97
+ litellm.failure_callback = ["langfuse"]
98
+ LLMClient.langfuse_enabled = True
99
+ logging.info("🛠️ LiteLLM Langfuse is enable !")
100
+ else:
101
+ LLMClient.langfuse_enabled = False
102
+ logging.warning("🛠️ LiteLLM Langfuse is disable, langfuse.enabled=false !")
93
103
  else:
94
104
  LLMClient.langfuse_enabled = False
95
- logging.warning("*** LiteLLM Langfuse is disable !")
96
-
105
+ logging.warning("🛠️ LiteLLM Langfuse is disable, LLM_LANGFUSE_ENABLE=False !")
97
106
 
98
107
  def _prepare_llm_params(self, llm_config_params: Dict[str, Any]) -> Dict[str, Any]:
99
108
  prepared_llm_params = llm_config_params.copy()
@@ -108,27 +117,27 @@ class LLMClient:
108
117
  # as it causes errors with inference profiles
109
118
  if model_name.startswith("bedrock/") and "claude-3-7" in model_name:
110
119
  prepared_llm_params.pop("max_tokens")
111
- logging.debug(f"prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
120
+ logging.debug(f"LLMClient prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
112
121
  else:
113
122
  is_openai_o_series = 'o1' in model_name
114
123
  is_openai_gpt5 = 'gpt-5' in model_name
115
124
  param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens"
116
125
  if param_name == "max_completion_tokens":
117
126
  prepared_llm_params[param_name] = max_tokens
118
- logging.debug(f"prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
127
+ logging.debug(f"LLMClient prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
119
128
 
120
129
  # # Add Claude-specific headers
121
130
  if "claude" in model_name.lower() or "anthropic" in model_name.lower():
122
131
  prepared_llm_params["extra_headers"] = {
123
132
  "anthropic-beta": "output-128k-2025-02-19"
124
133
  }
125
- logging.debug(f"prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
134
+ logging.debug(f"LLMClient prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
126
135
 
127
136
  # Add Bedrock-specific parameters
128
137
  if model_name.startswith("bedrock/"):
129
138
  if not model_id and "anthropic.claude-3-7-sonnet" in model_name:
130
139
  prepared_llm_params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
131
- logging.debug(f"prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
140
+ logging.debug(f"LLMClient prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
132
141
 
133
142
  # Apply Anthropic prompt caching (minimal implementation)
134
143
  effective_model_name = llm_config_params.get("model", model_name)
@@ -136,14 +145,14 @@ class LLMClient:
136
145
  # OpenAI GPT-5: drop unsupported temperature param (only default 1 allowed)
137
146
  if "gpt-5" in effective_model_name and "temperature" in llm_config_params and llm_config_params["temperature"] != 1:
138
147
  prepared_llm_params.pop("temperature", None)
139
- logging.debug(f"prepare_llm_params: Remove 'temperature' param for model: {model_name}")
148
+ logging.debug(f"LLMClient prepare_llm_params: Remove 'temperature' param for model: {model_name}")
140
149
 
141
150
  # OpenAI GPT-5: request priority service tier when calling OpenAI directly
142
151
  # Pass via both top-level and extra_body for LiteLLM compatibility
143
152
  if "gpt-5" in effective_model_name and not effective_model_name.startswith("openrouter/"):
144
153
  prepared_llm_params["service_tier"] = "priority"
145
154
  prepared_llm_params["extra_body"] = {"service_tier": "priority"}
146
- logging.debug(f"prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
155
+ logging.debug(f"LLMClient prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
147
156
 
148
157
  # Add reasoning_effort for Anthropic models if enabled
149
158
  enable_thinking = llm_config_params.get("enable_thinking")
@@ -156,14 +165,14 @@ class LLMClient:
156
165
  prepared_llm_params["provider"] = {
157
166
  "order": ["together/fp8", "novita/fp8", "baseten/fp8", "moonshotai", "groq"]
158
167
  }
159
- logging.debug(f"prepare_llm_params: Add 'provider' param for model: {model_name}")
168
+ logging.debug(f"LLMClient prepare_llm_params: Add 'provider' param for model: {model_name}")
160
169
 
161
170
  reasoning_effort = llm_config_params.get("reasoning_effort")
162
171
  if is_anthropic and use_thinking:
163
172
  effort_level = reasoning_effort if reasoning_effort else 'low'
164
173
  prepared_llm_params["reasoning_effort"] = effort_level
165
174
  prepared_llm_params["temperature"] = 1.0 # Required by Anthropic when reasoning_effort is used
166
- logging.debug(f"prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
175
+ logging.debug(f"LLMClient prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
167
176
 
168
177
  return prepared_llm_params
169
178
 
@@ -197,7 +206,7 @@ class LLMClient:
197
206
  {"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
198
207
  ]
199
208
  cache_control_count += 1
200
- logging.debug(f"prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
209
+ logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
201
210
  elif isinstance(content, list):
202
211
  for item in content:
203
212
  if cache_control_count >= max_cache_control_blocks:
@@ -205,7 +214,7 @@ class LLMClient:
205
214
  if isinstance(item, dict) and item.get("type") == "text" and "cache_control" not in item:
206
215
  item["cache_control"] = {"type": "ephemeral"}
207
216
  cache_control_count += 1
208
- logging.debug(f"prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
217
+ logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
209
218
 
210
219
  return complete_params
211
220
 
@@ -225,18 +234,18 @@ class LLMClient:
225
234
  last_error = None
226
235
  for attempt in range(self.max_retries):
227
236
  try:
228
- logging.info(f"*** create_completion ***: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
237
+ logging.info(f"*** LLMClient create_completion: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
229
238
  response = await litellm.acompletion(**complete_params)
230
239
  return response
231
240
  except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
232
241
  last_error = e
233
242
  await self._handle_llm_error(e, attempt)
234
243
  except Exception as e:
235
- logging.error(f"create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
236
- raise LLMError(f"LLM completion failed: {e}")
244
+ logging.error(f"LLMClient create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
245
+ raise LLMError(f"LLMClient create completion failed: {e}")
237
246
 
238
- logging.error(f"create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
239
- raise LLMError(f"LLM completion failed after {self.max_retries} attempts !")
247
+ logging.error(f"LLMClient create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
248
+ raise LLMError(f"LLMClient create completion failed after {self.max_retries} attempts !")
240
249
 
241
250
  if __name__ == "__main__":
242
251
  from xgae.utils.setup_env import setup_logging
@@ -244,14 +253,16 @@ if __name__ == "__main__":
244
253
  setup_logging()
245
254
  langfuse = setup_langfuse()
246
255
 
247
- async def llm_completion():
256
+ async def main():
248
257
  llm_client = LLMClient(LLMConfig(stream=False))
249
258
 
250
259
  messages = [{"role": "user", "content": "1+1="}]
251
260
  trace_id = langfuse.trace(name = "xgae_litellm_test").trace_id
261
+ await asyncio.sleep(1)
262
+
252
263
  meta = LangfuseMetadata(
253
264
  generation_name="llm_completion_test",
254
- generation_id="generation_id",
265
+ generation_id="generation_id_0",
255
266
  existing_trace_id=trace_id,
256
267
  session_id="session_0",
257
268
  )
@@ -269,7 +280,6 @@ if __name__ == "__main__":
269
280
  else:
270
281
  print(response.choices[0].message.content)
271
282
 
272
-
273
- asyncio.run(llm_completion())
283
+ asyncio.run(main())
274
284
 
275
285
 
xgae/utils/setup_env.py CHANGED
@@ -52,7 +52,7 @@ def setup_logging(log_file: str=None, log_level: str="INFO") :
52
52
 
53
53
  logger.setLevel(logging_level)
54
54
 
55
- logging.info(f"📡 XGAE_LOGGING is initialized, log_level={log_level}, log_file={log_file}")
55
+ logging.info(f"🛠️ XGA_LOGGING is initialized, log_level={log_level}, log_file={log_file}")
56
56
 
57
57
  def setup_env_logging():
58
58
  log_enable = to_bool(os.getenv("LOG_ENABLE", True))
@@ -60,6 +60,7 @@ def setup_env_logging():
60
60
  log_file = os.getenv("LOG_FILE", "log/xga.log")
61
61
  if log_enable :
62
62
  setup_logging(log_file, log_level)
63
+ setup_logging(log_file, log_level)
63
64
 
64
65
  def setup_langfuse() -> Langfuse:
65
66
  env_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
@@ -71,10 +72,10 @@ def setup_langfuse() -> Langfuse:
71
72
  secret_key=env_secret_key,
72
73
  host=env_host)
73
74
 
74
- logging.info("📡 XGAE_LANGFUSE initialized Successfully by Key !")
75
+ logging.info("🛠️ XGA_LANGFUSE initialized Successfully by Key !")
75
76
  else:
76
77
  _langfuse = Langfuse(enabled=False)
77
- logging.warning("📡 XGAE_LANGFUSE Not set key, Langfuse is disabled!")
78
+ logging.warning("🛠️ XGA_LANGFUSE Not set key, Langfuse is disabled!")
78
79
 
79
80
  return _langfuse
80
81