PyPI - xgae - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

xgae 0.1.9py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xgae might be problematic. Click here for more details.

Files changed (19) hide show

xgae/__init__.py +4 -0
xgae/cli_app.py +85 -0
xgae/engine/engine_base.py +3 -3
xgae/engine/mcp_tool_box.py +4 -4
xgae/engine/responser/non_stream_responser.py +33 -38
xgae/engine/responser/responser_base.py +42 -40
xgae/engine/responser/stream_responser.py +95 -782
xgae/engine/task_engine.py +98 -50
xgae/engine/task_langfuse.py +8 -6
xgae/tools/without_general_tools_app.py +2 -3
xgae/utils/__init__.py +2 -2
xgae/utils/json_helpers.py +2 -2
xgae/utils/llm_client.py +42 -32
xgae/utils/setup_env.py +4 -3
{xgae-0.1.9.dist-info → xgae-0.1.12.dist-info}/METADATA +1 -1
xgae-0.1.12.dist-info/RECORD +21 -0
{xgae-0.1.9.dist-info → xgae-0.1.12.dist-info}/entry_points.txt +1 -0
xgae-0.1.9.dist-info/RECORD +0 -20
{xgae-0.1.9.dist-info → xgae-0.1.12.dist-info}/WHEEL +0 -0

xgae/engine/task_engine.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 from typing import List, Any, Dict, Optional, AsyncGenerator, Union, Literal
 from uuid import uuid4
-from xgae.utils import handle_error
+from xgae.utils import handle_error, to_bool
 from xgae.utils.llm_client import LLMClient, LLMConfig
 from xgae.utils.json_helpers import format_for_yield
@@ -46,6 +46,8 @@ class XGATaskEngine:
         max_auto_run = max_auto_run if max_auto_run  else int(os.getenv("MAX_AUTO_RUN", 15))
         self.max_auto_run: int = 1 if max_auto_run <= 1 else max_auto_run
+        self.use_assistant_chunk_msg = to_bool(os.getenv("USE_ASSISTANT_CHUNK_MSG", False))
         self.tool_exec_parallel = True if tool_exec_parallel is None else tool_exec_parallel
         self.task_no = -1
@@ -67,7 +69,7 @@ class XGATaskEngine:
                 chunks.append(chunk)
             if len(chunks) > 0:
-                final_result = self._parse_final_result(chunks)
+                final_result = self.parse_final_result(chunks)
             else:
                 final_result = XGATaskResult(type="error", content="LLM Answer is Empty")
@@ -117,22 +119,22 @@ class XGATaskEngine:
             self.task_prompt = self.prompt_builder.build_task_prompt(self.model_name, general_tool_schemas, custom_tool_schemas)
-            logging.info("*" * 30 + f"   XGATaskEngine Task'{self.task_id}' Initialized   " + "*" * 30)
+            logging.info("*" * 10 + f"   XGATaskEngine Task'{self.task_id}' Initialized   " + "*" * 10)
             logging.info(f"model_name={self.model_name}, is_stream={self.is_stream}")
             logging.info(f"general_tools={general_tools}, custom_tools={custom_tools}")
     async def _run_task_auto(self) -> AsyncGenerator[Dict[str, Any], None]:
-        def update_continuous_state(_auto_continue_count,  _auto_continue):
-            continuous_state["auto_continue_count"] = _auto_continue_count
-            continuous_state["auto_continue"] = _auto_continue
         continuous_state: TaskRunContinuousState = {
             "accumulated_content": "",
             "auto_continue_count": 0,
             "auto_continue": False if self.max_auto_run <= 1 else True
         }
+        def update_continuous_state(_auto_continue_count,  _auto_continue):
+            continuous_state["auto_continue_count"] = _auto_continue_count
+            continuous_state["auto_continue"] = _auto_continue
         auto_continue_count = 0
         auto_continue = True
         while auto_continue and auto_continue_count < self.max_auto_run:
@@ -146,34 +148,45 @@ class XGATaskEngine:
                             content = json.loads(chunk.get('content', '{}'))
                             status_type = content.get('status_type', None)
                             if status_type == "error":
-                                logging.error(f"run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
+                                logging.error(f"TaskEngine run_task_auto: task_response error: {chunk.get('message', 'Unknown error')}")
                                 auto_continue = False
                                 break
                             elif status_type == 'finish':
                                 finish_reason = content.get('finish_reason', None)
                                 if finish_reason == 'completed':
-                                    logging.info(f"run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
+                                    logging.info(f"TaskEngine run_task_auto: Detected finish_reason='completed', TASK_COMPLETE Success !")
                                     auto_continue = False
                                     break
                                 elif finish_reason == 'xml_tool_limit_reached':
-                                    logging.warning(f"run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
+                                    logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='xml_tool_limit_reached', stop auto-continue")
+                                    auto_continue = False
+                                    break
+                                elif finish_reason == 'non_tool_call':
+                                    logging.warning(f"TaskEngine run_task_auto: Detected finish_reason='non_tool_call', stop auto-continue")
                                     auto_continue = False
                                     break
                                 elif finish_reason == 'stop' or finish_reason == 'length': # 'length' never occur
                                     auto_continue = True
                                     auto_continue_count += 1
                                     update_continuous_state(auto_continue_count, auto_continue)
-                                    logging.info(f"run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
+                                    logging.info(f"TaskEngine run_task_auto: Detected finish_reason='{finish_reason}', auto-continuing ({auto_continue_count}/{self.max_auto_run})")
                     except Exception as parse_error:
-                        logging.error(f"run_task_auto: Error in parse chunk: {str(parse_error)}")
-                        content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
+                        logging.error(f"TaskEngine run_task_auto: Parse chunk error, chunk: {chunk}")
                         handle_error(parse_error)
+                        self.task_langfuse.root_span.event(name="engine_parse_chunk_error", level="ERROR",
+                                             status_message=(f"Task Engine parse chunk error: {parse_error}"),
+                                             metadata={"content": chunk})
+                        content = {"role": "system", "status_type": "error", "message": "Parse response chunk Error"}
                         error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
                         yield format_for_yield(error_msg)
             except Exception as run_error:
-                logging.error(f"run_task_auto: Call task_run_once error: {str(run_error)}")
-                content = {"role": "system", "status_type": "error", "message": "Call task_run_once error"}
+                logging.error(f"TaskEngine run_task_auto: Call task_run_once error: {run_error}")
                 handle_error(run_error)
+                self.task_langfuse.root_span.event(name="engine_task_run_once_error", level="ERROR",
+                                                   status_message=(f"Call task_run_once error: {run_error}"))
+                content = {"role": "system", "status_type": "error", "message": "Call run_task_once error"}
                 error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
                 yield format_for_yield(error_msg)
@@ -191,21 +204,25 @@ class XGATaskEngine:
             }
             llm_messages.append(temp_assistant_message)
-        llm_count = continuous_state.get("auto_continue_count")
-        langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(llm_count)
+        auto_count = continuous_state.get("auto_continue_count")
+        langfuse_metadata = self.task_langfuse.create_llm_langfuse_meta(auto_count)
+        self.task_langfuse.root_span.event(name="engine_start_create_completion", level="DEFAULT",
+                                           status_message=(f"Task Engine start create_completion llm_messages len={len(llm_messages)}"))
         llm_response = await self.llm_client.create_completion(llm_messages, langfuse_metadata)
         response_processor = self._create_response_processer()
         async for chunk in response_processor.process_response(llm_response, llm_messages, continuous_state):
-            self._logging_reponse_chunk(chunk)
+            self._logging_reponse_chunk(chunk, auto_count)
             yield chunk
-    def _parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
+    def parse_final_result(self, chunks: List[Dict[str, Any]]) -> XGATaskResult:
         final_result: XGATaskResult = None
+        reverse_chunks = reversed(chunks)
+        chunk = None
         try:
             finish_reason = ''
-            for chunk in reversed(chunks):
+            for chunk in reverse_chunks:
                 chunk_type = chunk.get("type")
                 if chunk_type == "status":
                     status_content = json.loads(chunk.get('content', '{}'))
@@ -215,10 +232,7 @@ class XGATaskEngine:
                         final_result = XGATaskResult(type="error", content=error)
                     elif status_type == "finish":
                         finish_reason = status_content.get('finish_reason', None)
-                        if finish_reason == 'xml_tool_limit_reached':
-                            error = "Completed due to over task max_auto_run limit !"
-                            final_result = XGATaskResult(type="error", content=error)
-                elif chunk_type == "tool" and finish_reason in ['completed', 'stop']:
+                elif chunk_type == "tool" and finish_reason in ['completed', 'stop', 'xml_tool_limit_reached']:
                     tool_content = json.loads(chunk.get('content', '{}'))
                     tool_execution = tool_content.get('tool_execution')
                     tool_name = tool_execution.get('function_name')
@@ -238,22 +252,33 @@ class XGATaskEngine:
                             result_type = "answer" if success else "error"
                             result_content = f"Task execute '{tool_name}' {result_type}: {output}"
                             final_result = XGATaskResult(type=result_type, content=result_content)
-                elif chunk_type == "assistant_complete" and finish_reason == 'stop':
+                elif chunk_type == "assistant" and finish_reason == 'non_tool_call':
                     assis_content = chunk.get('content', {})
                     result_content = assis_content.get("content", "LLM output is empty")
                     final_result = XGATaskResult(type="answer", content=result_content)
-                if final_result is not None:
+                if final_result:
                     break
+            if final_result and finish_reason == "completed":
+                logging.info(f"✅ FINAL_RESULT: finish_reason={finish_reason}, final_result={final_result}")
+            elif final_result is not None:
+                logging.warning(f"⚠️ FINAL_RESULT: finish_reason={finish_reason}, final_result={final_result}")
+            else:
+                logging.warning(f"❌ FINAL_RESULT: LLM Result is EMPTY, finish_reason={finish_reason}")
+                final_result = XGATaskResult(type="error", content="LLM has no answer")
         except Exception as e:
-            logging.error(f"parse_final_result: Final result pass error: {str(e)}")
-            final_result = XGATaskResult(type="error", content="Parse final result failed!")
+            logging.error(f"TaskEngine parse_final_result: Parse message chunk error, chunk: {chunk}")
             handle_error(e)
+            self.task_langfuse.root_span.event(name="engine_parse_final_result_error", level="ERROR",
+                                               status_message=(f"Task Engine parse final result error: {e}"),
+                                               metadata={"content": chunk})
-        return final_result
+            final_result = XGATaskResult(type="error", content="Parse final result failed!")
+        return final_result
-    def add_response_message(self, type: XGAResponseMsgType,
+    def create_response_message(self, type: XGAResponseMsgType,
                              content: Union[Dict[str, Any], List[Any], str],
                              is_llm_message: bool,
                              metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
@@ -271,10 +296,17 @@ class XGATaskEngine:
             content = content,
             metadata = metadata
         )
-        self.task_response_msgs.append(message)
         return message
+    def add_response_message(self, type: XGAResponseMsgType,
+                             content: Union[Dict[str, Any], List[Any], str],
+                             is_llm_message: bool,
+                             metadata: Optional[Dict[str, Any]]=None)-> XGAResponseMessage:
+        message = self.create_response_message(type, content, is_llm_message, metadata)
+        self.task_response_msgs.append(message)
+        return message
     def get_history_llm_messages (self) -> List[Dict[str, Any]]:
         llm_messages = []
         for message in self.task_response_msgs:
@@ -284,13 +316,12 @@ class XGATaskEngine:
         response_llm_contents = []
         for llm_message in llm_messages:
             content = llm_message["content"]
-            # @todo content List type
             if isinstance(content, str):
                 try:
                     _content = json.loads(content)
                     response_llm_contents.append(_content)
                 except json.JSONDecodeError as e:
-                    logging.error(f"get_context_llm_contents: Failed to decode json, content=:{content}")
+                    logging.error(f"TaskEngine get_history_llm_messages: Failed to decode json, content: {content}")
                     handle_error(e)
             else:
                 response_llm_contents.append(content)
@@ -315,9 +346,11 @@ class XGATaskEngine:
             "task_no": self.task_no,
             "model_name": self.model_name,
             "max_xml_tool_calls": 0,
+            "use_assistant_chunk_msg": self.use_assistant_chunk_msg,
             "tool_execution_strategy": "parallel" if self.tool_exec_parallel else "sequential",  # ,
             "xml_adding_strategy": "user_message",
             "add_response_msg_func": self.add_response_message,
+            "create_response_msg_func": self.create_response_message,
             "tool_box": self.tool_box,
             "task_langfuse": self.task_langfuse,
         }
@@ -328,37 +361,52 @@ class XGATaskEngine:
         return XGATaskLangFuse(self.session_id, self.task_id, self.task_run_id, self.task_no, self.agent_id)
-    def _logging_reponse_chunk(self, chunk):
-        chunk_type = chunk.get('type')
-        prefix = ""
-        if chunk_type == 'status':
-            content = json.loads(chunk.get('content', '{}'))
-            status_type = content.get('status_type', "empty")
-            prefix = "-" + status_type
-        elif chunk_type == 'tool':
-            tool_content = json.loads(chunk.get('content', '{}'))
-            tool_execution = tool_content.get('tool_execution')
-            tool_name = tool_execution.get('function_name')
-            prefix = "-" + tool_name
+    def _logging_reponse_chunk(self, chunk, auto_count: int)-> None:
+        try:
+            chunk_type = chunk.get('type', 'unknown')
+            prefix = ""
+            if chunk_type == 'status':
+                content = json.loads(chunk.get('content', '{}'))
+                status_type = content.get('status_type', "empty")
+                if status_type in ["tool_started", "tool_completed"]:
+                    return
+                prefix = "-" + status_type
+            elif chunk_type == 'tool':
+                tool_content = json.loads(chunk.get('content', '{}'))
+                tool_execution = tool_content.get('tool_execution')
+                tool_name = tool_execution.get('function_name')
+                prefix = "-" + tool_name
+            content = chunk.get('content', '')
+            pretty_content = content
+            if isinstance(content, dict):
+                pretty_content = json.dumps(content, ensure_ascii=False, indent=2)
+            if chunk_type == "assistant_chunk":
+                logging.debug(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
+            else:
+                logging.info(f"TASK_RESP_CHUNK[{auto_count}]<{chunk_type}{prefix}> content: {pretty_content}")
-        logging.info(f"TASK_RESP_CHUNK[{chunk_type}{prefix}]: {chunk}")
+        except Exception as e:
+            logging.error(f"TaskEngine logging_reponse_chunk: Decorate chunk log error， chunk: {chunk}")
+            handle_error(e)
 if __name__ == "__main__":
     import asyncio
     from xgae.utils.misc import read_file
+    from xgae.utils.setup_env import setup_logging
+    setup_logging()
     async def main():
         # Before Run Exec: uv run custom_fault_tools
         tool_box = XGAMcpToolBox(custom_mcp_server_file="mcpservers/custom_servers.json")
-        system_prompt = read_file("templates/example_user_prompt.txt")
+        system_prompt = read_file("templates/example/fault_user_prompt.txt")
         engine =  XGATaskEngine(tool_box=tool_box,
                                     general_tools=[],
                                     custom_tools=["*"],
-                                    llm_config=LLMConfig(stream=False),
                                     system_prompt=system_prompt,
-                                    max_auto_run=8,
                                     session_id="session_1",
                                     agent_id="agent_1",)

xgae/engine/task_langfuse.py CHANGED Viewed

@@ -2,20 +2,22 @@
 from typing import Any, Dict, Optional
 from langfuse import Langfuse
-from xgae.utils.setup_env import setup_langfuse, setup_env_logging
+from xgae.utils.setup_env import setup_langfuse
 from xgae.utils.llm_client import LangfuseMetadata
 from xgae.engine.engine_base import XGATaskResult
-setup_env_logging()
-langfuse:Langfuse = setup_langfuse()
 class XGATaskLangFuse:
+    langfuse: Langfuse = None
     def __init__(self,
                  session_id: str,
                  task_id:str,
                  task_run_id: str,
                  task_no: int,
                  agent_id: str) -> None:
+        if XGATaskLangFuse.langfuse is None:
+            XGATaskLangFuse.langfuse =  setup_langfuse()
         self.session_id = session_id
         self.task_id = task_id
         self.task_run_id = task_run_id
@@ -35,9 +37,9 @@ class XGATaskLangFuse:
             trace = None
             if trace_id:
                 self.trace_id = trace_id
-                trace = langfuse.trace(id=trace_id)
+                trace = XGATaskLangFuse.langfuse.trace(id=trace_id)
             else:
-                trace = langfuse.trace(name="xga_task_engine")
+                trace = XGATaskLangFuse.langfuse.trace(name="xga_task_engine")
                 self.trace_id = trace.id
             metadata = {"task_id": self.task_id, "session_id": self.session_id, "agent_id": self.agent_id}

xgae/tools/without_general_tools_app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from pydantic import Field
 from mcp.server.fastmcp import FastMCP
-from xgae.engine.engine_base import  XGAToolResult
 mcp = FastMCP(name="XGAE Message Tools")
@@ -17,7 +16,7 @@ async def complete(task_id: str,
                        description="Comma-separated list of final outputs. Use when: 1) Completion relates to files 2) User needs to review outputs 3) Deliverables in files")]
                    ):
     print(f"<XGAETools-complete>: task_id={task_id}, text={text}, attachments={attachments}")
-    return XGAToolResult(success=True, output=str({"status": "complete"}))
+    return {"status": "complete"}
 @mcp.tool(
@@ -30,7 +29,7 @@ async def ask(task_id: str,
                   description="Comma-separated list of files/URLs to attach. Use when: 1) Question relates to files/configs 2) User needs to review content 3) Options documented in files 4) Supporting evidence needed")]
               ):
     print(f"<XGAETools-ask>: task_id={task_id}, text={text}, attachments={attachments}")
-    return XGAToolResult(success=True, output=str({"status": "Awaiting user response..."}))
+    return {"status": "Awaiting user response..."}
 @mcp.tool(
     description="end task, destroy sandbox"

xgae/utils/__init__.py CHANGED Viewed

@@ -8,8 +8,8 @@ def handle_error(e: Exception) -> None:
     raise (e) from e
-def to_bool(value: str) -> bool:
+def to_bool(value: any) -> bool:
     if value is None:
         return False
-    return True if value.lower() == "true" else False
+    return str(value).lower() == "true"

xgae/utils/json_helpers.py CHANGED Viewed

@@ -159,10 +159,10 @@ def format_for_yield(message_object: Dict[str, Any]) -> Dict[str, Any]:
     # Ensure content is a JSON string
     if 'content' in formatted and not isinstance(formatted['content'], str):
-        formatted['content'] = json.dumps(formatted['content'])
+        formatted['content'] = json.dumps(formatted['content'], ensure_ascii=False, indent=2)
     # Ensure metadata is a JSON string
     if 'metadata' in formatted and not isinstance(formatted['metadata'], str):
-        formatted['metadata'] = json.dumps(formatted['metadata'])
+        formatted['metadata'] = json.dumps(formatted['metadata'], ensure_ascii=False, indent=2)
     return formatted

xgae/utils/llm_client.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Union, Dict, Any, Optional, List, TypedDict
 from openai import OpenAIError
 from litellm.utils import ModelResponse, CustomStreamWrapper
+from xgae.utils import to_bool
 from xgae.utils.setup_env import setup_langfuse
 class LLMConfig(TypedDict, total=False):
@@ -18,8 +19,8 @@ class LLMConfig(TypedDict, total=False):
     api_base: str           # Optional API base URL, Override .env LLM_API_BASE
     temperature: float      # temperature: Optional Sampling temperature (0-1), Override .env LLM_TEMPERATURE
     max_tokens: int         # max_tokens: Optional Maximum tokens in the response, Override .env LLM_MAX_TOKENS
-    stream: bool            # stream: Optional whether to stream the response, default is True
-    enable_thinking: bool   # Optional whether to enable thinking, default is  False
+    stream: bool            # stream: Optional whether to stream the response, Override .env LLM_STREAM
+    enable_thinking: bool   # Optional whether to enable thinking, Override .env LLM_ENABLE_THINKING
     reasoning_effort: str   # Optional level of reasoning effort, default is  ‘low’
     response_format: str    # response_format: Optional desired format for the response, default is  None
     top_p: int              # Optional Top-p sampling parameter, default is None
@@ -46,6 +47,7 @@ class LLMClient:
     def __init__(self, llm_config: LLMConfig=None):
         litellm.modify_params = True
         litellm.drop_params = True
         self._init_langfuse()
         llm_config = llm_config or LLMConfig()
@@ -56,6 +58,8 @@ class LLMClient:
         env_llm_api_base = os.getenv("LLM_API_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
         env_llm_max_tokens = int(os.getenv("LLM_MAX_TOKENS", 16384))
         env_llm_temperature = float(os.getenv("LLM_TEMPERATURE", 0.7))
+        env_llm_stream = to_bool(os.getenv("LLM_STREAM", False))
+        env_llm_enable_thinking = to_bool(os.getenv("LLM_ENABLE_THINKING", False))
         llm_config_params = {
             "model":            llm_config.get("model", env_llm_model),
@@ -65,8 +69,8 @@ class LLMClient:
             "api_base":         llm_config.get("api_base", env_llm_api_base),
             "temperature":      llm_config.get("temperature", env_llm_temperature),
             "max_tokens":       llm_config.get("max_tokens", env_llm_max_tokens),
-            "stream":           llm_config.get("stream", True),
-            "enable_thinking":  llm_config.get("enable_thinking", False),
+            "stream":           llm_config.get("stream", env_llm_stream),
+            "enable_thinking":  llm_config.get("enable_thinking", env_llm_enable_thinking),
             "reasoning_effort": llm_config.get("reasoning_effort", 'low'),
             "response_format":  llm_config.get("response_format", None),
             "top_p":            llm_config.get("top_p", None),
@@ -78,22 +82,27 @@ class LLMClient:
         self.is_stream = llm_config_params['stream']
         self.lite_llm_params = self._prepare_llm_params(llm_config_params)
-        logging.info(f"📡 LLMClient initialed : model={self.model_name}, is_stream={self.is_stream}, enable thinking={self.lite_llm_params['enable_thinking']}")
+        logging.info(f"=== LLMClient initialed : model={self.model_name}, is_stream={self.is_stream}, enable thinking={self.lite_llm_params['enable_thinking']}")
     @staticmethod
     def _init_langfuse():
         if not LLMClient.langfuse_inited:
             LLMClient.langfuse_inited =True
-            env_langfuse = setup_langfuse()
-            if env_langfuse and env_langfuse.enabled:
-                litellm.success_callback = ["langfuse"]
-                litellm.failure_callback = ["langfuse"]
-                LLMClient.langfuse_enabled = True
-                logging.info("=== LiteLLM Langfuse is enable !")
+            env_llm_langfuse_enable = to_bool(os.getenv("LLM_LANGFUSE_ENABLE", False))
+            if env_llm_langfuse_enable:
+                env_langfuse = setup_langfuse()
+                if env_langfuse and env_langfuse.enabled:
+                    litellm.success_callback = ["langfuse"]
+                    litellm.failure_callback = ["langfuse"]
+                    LLMClient.langfuse_enabled = True
+                    logging.info("🛠️ LiteLLM Langfuse is enable !")
+                else:
+                    LLMClient.langfuse_enabled = False
+                    logging.warning("🛠️ LiteLLM Langfuse is disable, langfuse.enabled=false !")
             else:
                 LLMClient.langfuse_enabled = False
-                logging.warning("*** LiteLLM Langfuse is disable !")
+                logging.warning("🛠️ LiteLLM Langfuse is disable, LLM_LANGFUSE_ENABLE=False !")
     def _prepare_llm_params(self, llm_config_params: Dict[str, Any]) -> Dict[str, Any]:
         prepared_llm_params = llm_config_params.copy()
@@ -108,27 +117,27 @@ class LLMClient:
             # as it causes errors with inference profiles
             if model_name.startswith("bedrock/") and "claude-3-7" in model_name:
                 prepared_llm_params.pop("max_tokens")
-                logging.debug(f"prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
+                logging.debug(f"LLMClient prepare_llm_params: Remove 'max_tokens' param for model: {model_name}")
             else:
                 is_openai_o_series = 'o1' in model_name
                 is_openai_gpt5 = 'gpt-5' in model_name
                 param_name = "max_completion_tokens" if (is_openai_o_series or is_openai_gpt5) else "max_tokens"
                 if param_name == "max_completion_tokens":
                     prepared_llm_params[param_name] = max_tokens
-                    logging.debug(f"prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
+                    logging.debug(f"LLMClient prepare_llm_params: Add 'max_completion_tokens' param for model: {model_name}")
         # # Add Claude-specific headers
         if "claude" in model_name.lower() or "anthropic" in model_name.lower():
             prepared_llm_params["extra_headers"] = {
                 "anthropic-beta": "output-128k-2025-02-19"
             }
-            logging.debug(f"prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
+            logging.debug(f"LLMClient prepare_llm_params: Add 'extra_headers' param for model: {model_name}")
         # Add Bedrock-specific parameters
         if model_name.startswith("bedrock/"):
             if not model_id and "anthropic.claude-3-7-sonnet" in model_name:
                 prepared_llm_params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
-                logging.debug(f"prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
+                logging.debug(f"LLMClient prepare_llm_params: Must Set 'model_id' param for model: {model_name}")
         # Apply Anthropic prompt caching (minimal implementation)
         effective_model_name = llm_config_params.get("model", model_name)
@@ -136,14 +145,14 @@ class LLMClient:
         # OpenAI GPT-5: drop unsupported temperature param (only default 1 allowed)
         if "gpt-5" in effective_model_name and "temperature" in llm_config_params and llm_config_params["temperature"] != 1:
             prepared_llm_params.pop("temperature", None)
-            logging.debug(f"prepare_llm_params: Remove 'temperature' param for model: {model_name}")
+            logging.debug(f"LLMClient prepare_llm_params: Remove 'temperature' param for model: {model_name}")
         # OpenAI GPT-5: request priority service tier when calling OpenAI directly
         # Pass via both top-level and extra_body for LiteLLM compatibility
         if "gpt-5" in effective_model_name and not effective_model_name.startswith("openrouter/"):
             prepared_llm_params["service_tier"] = "priority"
             prepared_llm_params["extra_body"] = {"service_tier": "priority"}
-            logging.debug(f"prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
+            logging.debug(f"LLMClient prepare_llm_params: Add 'service_tier' and 'extra_body' param for model: {model_name}")
         # Add reasoning_effort for Anthropic models if enabled
         enable_thinking = llm_config_params.get("enable_thinking")
@@ -156,14 +165,14 @@ class LLMClient:
             prepared_llm_params["provider"] = {
                 "order": ["together/fp8", "novita/fp8", "baseten/fp8", "moonshotai", "groq"]
             }
-            logging.debug(f"prepare_llm_params: Add 'provider' param for model: {model_name}")
+            logging.debug(f"LLMClient prepare_llm_params: Add 'provider' param for model: {model_name}")
         reasoning_effort = llm_config_params.get("reasoning_effort")
         if is_anthropic and use_thinking:
             effort_level = reasoning_effort if reasoning_effort else 'low'
             prepared_llm_params["reasoning_effort"] = effort_level
             prepared_llm_params["temperature"] = 1.0  # Required by Anthropic when reasoning_effort is used
-            logging.debug(f"prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
+            logging.debug(f"LLMClient prepare_llm_params: Set 'temperature'=1.0 param for model: {model_name}")
         return prepared_llm_params
@@ -197,7 +206,7 @@ class LLMClient:
                         {"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
                     ]
                     cache_control_count += 1
-                    logging.debug(f"prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
+                    logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content, for model: {model_name}")
                 elif isinstance(content, list):
                     for item in content:
                         if cache_control_count >= max_cache_control_blocks:
@@ -205,7 +214,7 @@ class LLMClient:
                         if isinstance(item, dict) and item.get("type") == "text" and "cache_control" not in item:
                             item["cache_control"] = {"type": "ephemeral"}
                             cache_control_count += 1
-                            logging.debug(f"prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
+                            logging.debug(f"LLMClient prepare_complete_params: Add 'cache_control' in message content list, for model: {model_name}")
         return complete_params
@@ -225,18 +234,18 @@ class LLMClient:
         last_error = None
         for attempt in range(self.max_retries):
             try:
-                logging.info(f"*** create_completion ***: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
+                logging.info(f"*** LLMClient create_completion: LLM '{self.model_name}' completion attempt {attempt + 1}/{self.max_retries}")
                 response = await litellm.acompletion(**complete_params)
                 return response
             except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
                 last_error = e
                 await self._handle_llm_error(e, attempt)
             except Exception as e:
-                logging.error(f"create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
-                raise LLMError(f"LLM completion failed: {e}")
+                logging.error(f"LLMClient create_completion: Unexpected error during LLM completion: {str(e)}", exc_info=True)
+                raise LLMError(f"LLMClient create completion failed: {e}")
-        logging.error(f"create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
-        raise LLMError(f"LLM completion failed after {self.max_retries} attempts !")
+        logging.error(f"LLMClient create_completion: LLM completion failed after {self.max_retries} attempts: {last_error}", exc_info=True)
+        raise LLMError(f"LLMClient create completion failed after {self.max_retries} attempts !")
 if __name__ == "__main__":
     from xgae.utils.setup_env import setup_logging
@@ -244,14 +253,16 @@ if __name__ == "__main__":
     setup_logging()
     langfuse = setup_langfuse()
-    async def llm_completion():
+    async def main():
         llm_client = LLMClient(LLMConfig(stream=False))
         messages = [{"role": "user", "content": "1+1="}]
         trace_id = langfuse.trace(name = "xgae_litellm_test").trace_id
+        await asyncio.sleep(1)
         meta = LangfuseMetadata(
                 generation_name="llm_completion_test",
-                generation_id="generation_id",
+                generation_id="generation_id_0",
                 existing_trace_id=trace_id,
                 session_id="session_0",
         )
@@ -269,7 +280,6 @@ if __name__ == "__main__":
         else:
             print(response.choices[0].message.content)
-    asyncio.run(llm_completion())
+    asyncio.run(main())

xgae/utils/setup_env.py CHANGED Viewed

@@ -52,7 +52,7 @@ def setup_logging(log_file: str=None, log_level: str="INFO") :
     logger.setLevel(logging_level)
-    logging.info(f"📡 XGAE_LOGGING is initialized, log_level={log_level}, log_file={log_file}")
+    logging.info(f"🛠️ XGA_LOGGING is initialized, log_level={log_level}, log_file={log_file}")
 def setup_env_logging():
     log_enable = to_bool(os.getenv("LOG_ENABLE", True))
@@ -60,6 +60,7 @@ def setup_env_logging():
     log_file = os.getenv("LOG_FILE", "log/xga.log")
     if log_enable :
         setup_logging(log_file, log_level)
+        setup_logging(log_file, log_level)
 def setup_langfuse() -> Langfuse:
     env_public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
@@ -71,10 +72,10 @@ def setup_langfuse() -> Langfuse:
                             secret_key=env_secret_key,
                             host=env_host)
-        logging.info("📡 XGAE_LANGFUSE initialized Successfully by Key !")
+        logging.info("🛠️ XGA_LANGFUSE initialized Successfully by Key !")
     else:
         _langfuse = Langfuse(enabled=False)
-        logging.warning("📡 XGAE_LANGFUSE Not set key, Langfuse is disabled!")
+        logging.warning("🛠️ XGA_LANGFUSE Not set key, Langfuse is disabled!")
     return _langfuse

xgae 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl

Potentially problematic release.

xgae 0.1.9py3-none-any.whl → 0.1.12py3-none-any.whl