PyPI - ws-bom-robot-app - Versions diffs - 0.0.41__tar.gz → 0.0.43__tar.gz - Mend

ws-bom-robot-app 0.0.41tar.gz → 0.0.43tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

{ws_bom_robot_app-0.0.41/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.43}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ws_bom_robot_app
-Version: 0.0.41
+Version: 0.0.43
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
 setup(
     name="ws_bom_robot_app",
-    version="0.0.41",
+    version="0.0.43",
     description="A FastAPI application serving ws bom/robot/llm platform ai.",
     long_description=open("README.md", encoding='utf-8').read(),
     long_description_content_type="text/markdown",

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/ws_bom_robot_app/config.py RENAMED Viewed

@@ -21,6 +21,7 @@ class Settings(BaseSettings):
     robot_cms_db_folder: str = 'llmVectorDb'
     robot_cms_kb_folder: str ='llmKbFile'
     ANTHROPIC_API_KEY: str = ''
+    DEEPSEEK_API_KEY: str = ''
     OPENAI_API_KEY: str = '' # used also for saas dall-e api
     OLLAMA_API_URL: str = 'http://localhost:11434'
     GROQ_API_KEY: str = ''
@@ -36,6 +37,7 @@ class Settings(BaseSettings):
         os.environ["OPENAI_API_KEY"] = self.OPENAI_API_KEY
         os.environ["OLLAMA_API_URL"] = self.OLLAMA_API_URL
         os.environ["ANTHROPIC_API_KEY"] = self.ANTHROPIC_API_KEY
+        os.environ["DEEPSEEK_API_KEY"] = self.DEEPSEEK_API_KEY
         os.environ["GROQ_API_KEY"] = self.GROQ_API_KEY
         os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
         os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.GOOGLE_APPLICATION_CREDENTIALS

ws_bom_robot_app-0.0.43/ws_bom_robot_app/llm/agent_handler.py ADDED Viewed

@@ -0,0 +1,178 @@
+from asyncio import Queue
+from langchain_core.agents import AgentFinish
+from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
+from langchain.callbacks.base import AsyncCallbackHandler
+from ws_bom_robot_app.llm.utils.print import print_json, print_string
+from typing import Any, Dict, List, Optional, Union
+from uuid import UUID
+import ws_bom_robot_app.llm.settings as settings
+from langchain_core.callbacks.base import AsyncCallbackHandler
+from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
+import json, logging, re
+# Here is a custom handler that will print the tokens to stdout.
+# Instead of printing to stdout you can send the data elsewhere; e.g., to a streaming API response
+def _parse_token(llm:str,token: str) -> str:
+    """Parses the token based on the LLM provider."""
+    if llm == "anthropic" and isinstance(token, list):
+      first = token[0]
+      if 'text' in first:
+        token = first['text']
+      else:
+        #[{'id': 'toolu_01GGLwJcrQ8PvFMUkQPGu8n7', 'input': {}, 'name': 'document_retriever_xxx', 'type': 'tool_use', 'index': 1}]
+        token = ""
+    return token
+class AgentHandler(AsyncCallbackHandler):
+    def __init__(self, queue: Queue, llm:str, threadId: str = None) -> None:
+        super().__init__()
+        self._threadId = threadId
+        self.queue = queue
+        self.llm = llm
+        self.__started: bool = False
+        # on new token event
+        self.stream_buffer = ""      # accumulates text that hasn't been processed yet
+        self.in_json_block = False
+        self.json_buffer = ""
+        self.json_start_regex = re.compile(r'(`{1,3}\s*json\b)') # detect a potential json start fence.
+        self.json_end_regex = re.compile(r'(`{1,3})')         # an end fence (one to three backticks).
+        self.stream_cut_last_output_chunk_size = 16  # safe cut last chunk size to output if no markers are found
+    async def on_chat_model_start(self, serialized, messages, *, run_id, parent_run_id = None, tags = None, metadata = None, **kwargs):
+        if not self.__started:
+          self.__started = True
+          firstChunk = {
+              "type": "info",
+              "threadId": self._threadId,
+          }
+          await self.queue.put(print_json(firstChunk))
+    async def on_llm_new_token(
+        self,
+        token: str,
+        *,
+        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        if token:
+            token = _parse_token(self.llm,token)
+            if token:
+              self.stream_buffer += token.replace('\n','')  # append new data to pending buffer
+              if not self.in_json_block:
+                # search for the start of a json block.
+                start_match = self.json_start_regex.search(self.stream_buffer)
+                if start_match:
+                    start_index = start_match.start()
+                    # everything before the start marker is normal content.
+                    if start_index > 0:
+                        _before = self.stream_buffer[:start_index].replace('`','').strip() # remove eventual preceding backticks.
+                        if _before:
+                            await self.queue.put(print_string(_before))
+                    # remove the start marker from pending.
+                    self.stream_buffer = self.stream_buffer[start_match.end():]
+                    # switch into json mode.
+                    self.in_json_block = True
+                    self.json_buffer = ""
+                else:
+                    # no json start marker found. It might be because the marker is split between chunks.
+                    # to avoid losing potential marker fragments, output what we can safely process:
+                    # if the pending text is long, we output most of it except the last few characters.
+                    if len(self.stream_buffer) > self.stream_cut_last_output_chunk_size:
+                        safe_cut = self.stream_buffer[:-3]
+                        await self.queue.put(print_string(safe_cut))
+                        self.stream_buffer = self.stream_buffer[-3:]
+              else:
+                  # in json block: look for an end fence.
+                  end_match = self.json_end_regex.search(self.stream_buffer,endpos=3)
+                  if end_match:
+                      end_index = end_match.start()
+                      self.json_buffer += self.stream_buffer[:end_index]
+                      try:
+                          data = json.loads(self.json_buffer.replace('`',''))
+                          await self.queue.put(print_json(data))
+                      except json.JSONDecodeError as e:
+                          logging.error(f"on_token: invalid json: {e} | {self.json_buffer}")
+                      finally:
+                          self.json_buffer = ""
+                      # remove the end fence from pending.
+                      self.stream_buffer = self.stream_buffer[end_match.end():].replace('`','').strip()
+                      self.in_json_block = False
+                  else:
+                      # no end marker found
+                      # accumulate everything and break to wait for more data.
+                      self.json_buffer += self.stream_buffer
+                      self.stream_buffer = ""
+    async def on_agent_finish(
+        self,
+        finish: AgentFinish,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID = None,
+        tags: List[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        settings.chat_history.extend(
+            [
+                AIMessage(content=_parse_token(self.llm,finish.return_values["output"])),
+            ]
+        )
+        # end-of-stream: flush any remaining text
+        if self.in_json_block:
+            try:
+                data = json.loads(self.json_buffer)
+                await self.queue.put(print_json(data))
+            except json.JSONDecodeError as e  :
+                logging.error(f"on_agent_finish: invalid json: {e} | {self.json_buffer}")
+                #await self.queue.put(print_string(self.json_buffer))
+        elif self.stream_buffer:
+            await self.queue.put(print_string(self.stream_buffer))
+        finalChunk = {"type": "end"}
+        await self.queue.put(print_json(finalChunk))
+        await self.queue.put(None)
+class RawAgentHandler(AsyncCallbackHandler):
+    def __init__(self,queue: Queue, llm: str) -> None:
+        super().__init__()
+        self.queue = queue
+        self.llm = llm
+    async def on_llm_new_token(
+        self,
+        token: str,
+        *,
+        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Handles new tokens during streaming."""
+        if token:  # only process non-empty tokens
+            await self.queue.put(_parse_token(self.llm,token))
+    async def on_agent_finish(
+        self,
+        finish: AgentFinish,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID = None,
+        tags: List[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        settings.chat_history.extend(
+            [
+                AIMessage(content=_parse_token(self.llm,finish.return_values["output"]))
+            ]
+        )
+        await self.queue.put(None)

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/ws_bom_robot_app/llm/agent_lcel.py RENAMED Viewed

@@ -20,32 +20,26 @@ class AgentLcel:
         self.__tools = tools
         self.rules = rules
         self.embeddings = llm.get_embeddings()
-        self.memory_key = "chat_history"
+        self.memory_key: str = "chat_history"
         self.__llm_with_tools = llm.get_llm().bind_tools(self.__tools) if len(self.__tools) > 0 else llm.get_llm()
         self.executor = self.__create_agent()
     async def __create_prompt(self, input: dict) -> ChatPromptTemplate:
-        message : LlmMessage = input["input"]
+        message : LlmMessage = input[self.memory_key][-1]
         input = message.content
         rules_prompt = await get_rules(self.embeddings, self.rules, input) if self.rules else ""
         system = default_prompt + (tool_prompt(render_text_description(self.__tools)) if len(self.__tools)>0 else "") + self.sys_message + rules_prompt
-        return ChatPromptTemplate.from_messages(
-            [
-                (
-                    "system", system
-                ),
+        return ChatPromptTemplate([
+                ("system", system),
                 MessagesPlaceholder(variable_name=self.memory_key),
-                ("user", "{input}"),
                 MessagesPlaceholder(variable_name="agent_scratchpad"),
-            ]
-        )
+            ])
     def __create_agent(self) -> AgentExecutor:
       agent: Any = (
           {
-            "input": lambda x: x["input"],
             "agent_scratchpad": lambda x: self.__llm.get_formatter(x["intermediate_steps"]),
-            "chat_history": lambda x: x["chat_history"],
+             str(self.memory_key): lambda x: x[self.memory_key],
           }
           | RunnableLambda(self.__create_prompt)
           | self.__llm_with_tools

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/ws_bom_robot_app/llm/main.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from asyncio import Queue
-import asyncio, json, logging, os, traceback
+import asyncio, json, logging, os, traceback, re
 from fastapi import Request
 from langchain.callbacks.tracers import LangChainTracer
 from langchain_core.callbacks.base import AsyncCallbackHandler
@@ -28,6 +28,18 @@ async def invoke(rq: InvokeRequest) -> str:
   result: AIMessage = await processor.run_agent(_msg)
   return {"result": result.content}
+def _parse_formatted_message(message: str) -> str:
+  try:
+    text_fragments = []
+    quoted_strings = re.findall(r'"([^"\\]*(?:\\.[^"\\]*)*)"', message)
+    for string in quoted_strings:
+      if not string.startswith(('threadId', 'type')) and len(string) > 1:
+          text_fragments.append(string)
+    result = ''.join(text_fragments)
+    result = result.replace('\\n', '\n')
+  except:
+    result = message
+  return result
 async def __stream(rq: StreamRequest, ctx: Request, queue: Queue,formatted: bool = True) -> None:
   await rq.initialize()
   #os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
@@ -41,21 +53,33 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue,formatted: bool
   #CREATION OF CHAT HISTORY FOR AGENT
   for message in rq.messages:
-      if message.role == "user":
-          settings.chat_history.append(HumanMessage(content=message.content))
-      elif message.role == "assistant":
-          message_content = ""
-          if formatted and '{\"type\":\"text\"' in message.content:
-            try:
-              json_msg = json.loads('[' + message.content[:-1] + ']')
-              for msg in json_msg:
-                if msg.get("content"):
-                  message_content += msg["content"]
-            except:
-              message_content = message.content
-          else:
-            message_content = message.content
-          settings.chat_history.append(AIMessage(content=message_content))
+    if message.role in ["human","user"]:
+      settings.chat_history.append(HumanMessage(content=message.content))
+    elif message.role in ["ai","assistant"]:
+      message_content = ""
+      if formatted:
+        if '{\"type\":\"string\"' in message.content:
+          try:
+            json_msg = json.loads('[' + message.content[:-1] + ']')
+            for msg in json_msg:
+              if msg.get("content"):
+                message_content += msg["content"]
+          except:
+            message_content = _parse_formatted_message(message.content)
+        elif '{\"type\":\"text\"' in message.content:
+          try:
+            json_msg = json.loads('[' + message.content[:-1] + ']')
+            for msg in json_msg:
+              if msg.get("text"):
+                message_content += msg["text"]
+          except:
+            message_content = _parse_formatted_message(message.content)
+        else:
+          message_content = _parse_formatted_message(message.content)
+      else:
+        message_content = message.content
+      if message_content:
+        settings.chat_history.append(AIMessage(content=message_content))
   if rq.lang_chain_tracing:
     client = LangSmithClient(
@@ -79,11 +103,9 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue,formatted: bool
         )
     callbacks.append(nebuly_callback)
-  #with warnings.catch_warnings():
-  #  warnings.simplefilter("ignore", UserWarning)
   try:
     await processor.executor.ainvoke(
-        {"input": rq.messages[-1], "chat_history": settings.chat_history},
+        {"chat_history": settings.chat_history},
         {"callbacks": callbacks},
     )
   except Exception as e:

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/ws_bom_robot_app/llm/providers/llm_manager.py RENAMED Viewed

@@ -44,7 +44,9 @@ class OpenAI(LlmInterface):
     def get_llm(self):
         from langchain_openai import ChatOpenAI
-        chat = ChatOpenAI(api_key=self.config.api_key, model=self.config.model)
+        chat = ChatOpenAI(
+            api_key=self.config.api_key or os.getenv("OPENAI_API_KEY"),
+            model=self.config.model)
         if not any(self.config.model.startswith(prefix) for prefix in ["o1", "o3"]):
             chat.temperature = self.config.temperature
             chat.streaming = True
@@ -60,9 +62,9 @@ class DeepSeek(LlmInterface):
     def get_llm(self):
         from langchain_openai import ChatOpenAI
         return ChatOpenAI(
-            api_key=self.config.api_key,
+            api_key=self.config.api_key or os.getenv("DEEPSEEK_API_KEY"),
             model=self.config.model,
-            base_url="https://api.deepseek.com/v1",
+            base_url="https://api.deepseek.com",
             max_tokens=8192,
             temperature=self.config.temperature,
             streaming=True,
@@ -79,7 +81,7 @@ class Google(LlmInterface):
     from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
     return ChatGoogleGenerativeAI(
       name="chat",
-      api_key=self.config.api_key,
+      api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
       model=self.config.model,
       temperature=self.config.temperature,
       disable_streaming=False
@@ -89,7 +91,7 @@ class Google(LlmInterface):
     from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
     return GoogleGenerativeAIEmbeddings(
       google_api_key=self.config.api_key,
-      model="models/text-embedding-004")
+      model="models/text-embedding-005")
   def get_models(self):
     import google.generativeai as genai
@@ -112,7 +114,7 @@ class Gvertex(LlmInterface):
         )
     def get_embeddings(self):
         from langchain_google_vertexai import VertexAIEmbeddings
-        return VertexAIEmbeddings(model_name="text-embedding-004")
+        return VertexAIEmbeddings(model_name="text-embedding-005")
     def get_models(self):
         #from google.cloud import aiplatform
         #aiplatform.init()
@@ -123,7 +125,7 @@ class Gvertex(LlmInterface):
         #see https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#united-states for available models
         return [
               {"id":"gemini-2.0-flash-001"},
-              {"id":"gemini-1.5-pro-001"},
+              {"id":"gemini-2.0-flash-lite-001"},
               {"id":"gemini-1.5-pro-002"}
             ]
@@ -131,7 +133,7 @@ class Anthropic(LlmInterface):
     def get_llm(self):
         from langchain_anthropic import ChatAnthropic
         return ChatAnthropic(
-            api_key=self.config.api_key,
+            api_key=self.config.api_key or os.getenv("ANTHROPIC_API_KEY"),
             model=self.config.model,
             temperature=self.config.temperature,
             streaming=True,
@@ -156,7 +158,7 @@ class Groq(LlmInterface):
     def get_llm(self):
         from langchain_groq import ChatGroq
         return ChatGroq(
-            api_key=self.config.api_key,
+            api_key=self.config.api_key or os.getenv("GROQ_API_KEY"),
             model=self.config.model,
             #max_tokens=8192,
             temperature=self.config.temperature,

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/ws_bom_robot_app/llm/tools/utils.py RENAMED Viewed

@@ -2,10 +2,10 @@ import random, os
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import PromptTemplate
 from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
-from ws_bom_robot_app.llm.utils.print import printString
+from ws_bom_robot_app.llm.utils.print import print_string
 def __print_output(data: str) -> str:
-  return printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
+  return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
 def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
   if not waiting_messages: return ""

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43}/ws_bom_robot_app/llm/utils/print.py RENAMED Viewed

@@ -14,16 +14,16 @@ class HiddenPrints:
         sys.stdout = self._original_stdout
         sys.stderr = self._original_stderr
-def printJson(data) -> str:
-    return f"{json.dumps(data, indent=2, sort_keys=True)},"
+def print_json(data) -> str:
+    return print_single_json(data) + ","
-def printSingleJson(data) -> str:
-    return f"{json.dumps(data, indent=2, sort_keys=True)}"
+def print_single_json(data) -> str:
+    return json.dumps(data, sort_keys=True)
-def printString(data: str) -> str:
+def print_string(data: str) -> str:
     if data != "":
-        return printJson(data)
+        return print_json(data)
-def printSingleString(data: str) -> str:
+def print_single_string(data: str) -> str:
     if data != "":
-        return printSingleJson(data)
+        return print_single_json(data)

{ws_bom_robot_app-0.0.41 → ws_bom_robot_app-0.0.43/ws_bom_robot_app.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ws_bom_robot_app
-Version: 0.0.41
+Version: 0.0.43
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa

ws_bom_robot_app-0.0.41/ws_bom_robot_app/llm/agent_handler.py DELETED Viewed

@@ -1,180 +0,0 @@
-from asyncio import Queue
-from langchain_core.agents import AgentFinish
-from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
-from langchain.callbacks.base import AsyncCallbackHandler
-from ws_bom_robot_app.llm.utils.print import printJson, printString
-from typing import Any, Dict, List, Optional, Union
-from uuid import UUID
-import ws_bom_robot_app.llm.settings as settings
-from langchain_core.callbacks.base import AsyncCallbackHandler
-from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
-from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
-import json
-# Here is a custom handler that will print the tokens to stdout.
-# Instead of printing to stdout you can send the data elsewhere; e.g., to a streaming API response
-def _parse_token(llm:str,token: str) -> str:
-    """Parses the token based on the LLM provider."""
-    if llm == "anthropic" and isinstance(token, list):
-      first = token[0]
-      if 'text' in first:
-        token = first['text']
-      else:
-        #[{'id': 'toolu_01GGLwJcrQ8PvFMUkQPGu8n7', 'input': {}, 'name': 'document_retriever_xxx', 'type': 'tool_use', 'index': 1}]
-        token = ""
-    return token
-class AgentHandler(AsyncCallbackHandler):
-    def __init__(self, queue: Queue, llm:str, threadId: str = None) -> None:
-        super().__init__()
-        self._threadId = threadId
-        self.json_block = ""
-        self.is_json_block = False
-        self.backtick_count = 0  # Conteggio dei backticks per il controllo accurato
-        self.queue = queue
-        self.llm = llm
-    async def on_llm_start(
-        self,
-        serialized: Dict[str, Any],
-        prompts: List[str],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID = None,
-        tags: List[str] = None,
-        metadata: Dict[str, Any] = None,
-        **kwargs: Any,
-    ) -> None:
-        firstChunk = {
-            "type": "info",
-            "threadId": self._threadId,
-        }
-        await self.queue.put(printString(firstChunk))
-    """async def on_chat_model_start(self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], *, run_id: UUID = None, parent_run_id = None, tags = None, metadata = None, **kwargs: Any) -> Any:
-        pass"""
-    async def on_tool_end(self, output: Any, *, run_id: UUID, parent_run_id: UUID = None, tags: List[str] = None, **kwargs: Any) -> None:
-      pass
-    async def on_llm_new_token(
-        self,
-        token: str,
-        *,
-        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
-        run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        tags: Optional[List[str]] = None,
-        **kwargs: Any,
-    ) -> None:
-        """Gestisce i nuovi token durante lo streaming."""
-        if token:
-            token = _parse_token(self.llm,token)
-            if token:
-              self.backtick_count += token.count("`")
-              if self.backtick_count >= 3:
-                  if not self.is_json_block:
-                      self.is_json_block = True
-                      self.json_block = ""
-                  else:
-                      self.is_json_block = False
-                      self.json_block += token.replace("```json", '')
-                      await self.process_json_block(self.json_block)
-                      self.json_block = ""
-                  self.backtick_count = 0
-              elif self.is_json_block:
-                  self.json_block += token
-              else:
-                  await self.queue.put(printString(token))
-    async def on_agent_finish(
-        self,
-        finish: AgentFinish,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID = None,
-        tags: List[str] = None,
-        **kwargs: Any,
-    ) -> None:
-        settings.chat_history.extend(
-            [
-                AIMessage(content=_parse_token(self.llm,finish.return_values["output"])),
-            ]
-        )
-        finalChunk = {"type": "end"}
-        await self.queue.put(printJson(finalChunk))
-        await self.queue.put(None)
-    async def process_json_block(self, json_block: str):
-      """Processa il blocco JSON completo."""
-      # Rimuove il delimitatore iniziale '```json' se presente, e spazi vuoti
-      json_block_clean = json_block.replace('```', '').replace('json', '').strip()
-      # Verifica che il blocco non sia vuoto prima di tentare il parsing
-      if json_block_clean:
-          try:
-              # Prova a fare il parsing del JSON
-              parsed_json = json.loads(json_block_clean)
-              await self.queue.put(printJson(parsed_json))
-          except json.JSONDecodeError as e:
-              # Se il JSON è malformato, logga l'errore
-              raise e
-class RawAgentHandler(AsyncCallbackHandler):
-    def __init__(self,queue: Queue, llm: str) -> None:
-        super().__init__()
-        self.queue = queue
-        self.llm = llm
-    async def on_llm_start(
-        self,
-        serialized: Dict[str, Any],
-        prompts: List[str],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID = None,
-        tags: List[str] = None,
-        metadata: Dict[str, Any] = None,
-        **kwargs: Any,
-    ) -> None:
-        pass
-    """async def on_chat_model_start(self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], *, run_id: UUID = None, parent_run_id = None, tags = None, metadata = None, **kwargs: Any) -> Any:
-        pass"""
-    async def on_tool_end(self, output: Any, *, run_id: UUID, parent_run_id: UUID = None, tags: List[str] = None, **kwargs: Any) -> None:
-      pass
-    async def on_llm_new_token(
-        self,
-        token: str,
-        *,
-        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
-        run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        tags: Optional[List[str]] = None,
-        **kwargs: Any,
-    ) -> None:
-        """Handles new tokens during streaming."""
-        if token:  # Only process non-empty tokens
-            await self.queue.put(_parse_token(self.llm,token))
-    async def on_agent_finish(
-        self,
-        finish: AgentFinish,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID = None,
-        tags: List[str] = None,
-        **kwargs: Any,
-    ) -> None:
-        settings.chat_history.extend(
-            [
-                AIMessage(content=_parse_token(self.llm,finish.return_values["output"]))
-            ]
-        )
-        await self.queue.put(None)