PyPI - sonika-langchain-bot - Versions diffs - 0.0.17__tar.gz → 0.0.19__tar.gz - Mend

sonika-langchain-bot 0.0.17tar.gz → 0.0.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sonika-langchain-bot might be problematic. Click here for more details.

Files changed (21) hide show

{sonika_langchain_bot-0.0.17/src/sonika_langchain_bot.egg-info → sonika_langchain_bot-0.0.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sonika-langchain-bot
-Version: 0.0.17
+Version: 0.0.19
 Summary: Agente langchain con LLM
 Author: Erley Blanco Carvajal
 License: MIT License

{sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="sonika-langchain-bot",
-    version="0.0.17",
+    version="0.0.19",
     description="Agente langchain con LLM",
     author="Erley Blanco Carvajal",
     license="MIT License",

{sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/langchain_bot_agent.py RENAMED Viewed

@@ -40,7 +40,6 @@ class LangChainBot:
         - Thread-based conversation persistence
         - Streaming responses
         - Backward compatibility with legacy APIs
-        - Debug logging injection for production troubleshooting
     """
     def __init__(self,
@@ -61,7 +60,7 @@ class LangChainBot:
             tools (List[BaseTool], optional): Traditional LangChain tools to bind to the model
             mcp_servers (Dict[str, Any], optional): MCP server configurations for dynamic tool loading
             use_checkpointer (bool): Enable automatic conversation persistence using LangGraph checkpoints
-            logger (logging.Logger, optional): Logger instance for debugging. If None, uses silent NullHandler
+            logger (Optional[logging.Logger]): Logger instance for error tracking (silent by default if not provided)
         Note:
             The instructions will be automatically enhanced with tool descriptions
@@ -72,17 +71,11 @@ class LangChainBot:
         if logger is None:
             self.logger.addHandler(logging.NullHandler())
-        self.logger.info("="*80)
-        self.logger.info("🚀 Inicializando LangChainBot")
-        self.logger.info("="*80)
         # Core components
         self.language_model = language_model
         self.embeddings = embeddings
         self.base_instructions = instructions
-        self.logger.debug(f"📋 Instrucciones base: {len(instructions)} caracteres")
         # Backward compatibility attributes
         self.chat_history: List[BaseMessage] = []
         self.vector_store = None
@@ -91,39 +84,25 @@ class LangChainBot:
         self.tools = tools or []
         self.mcp_client = None
-        self.logger.info(f"🔧 Herramientas iniciales: {len(self.tools)}")
         # Initialize MCP servers if provided
         if mcp_servers:
-            self.logger.info(f"🌐 Servidores MCP detectados: {len(mcp_servers)}")
             self._initialize_mcp(mcp_servers)
-        else:
-            self.logger.debug("⚪ Sin servidores MCP configurados")
         # Configure persistence layer
         self.checkpointer = MemorySaver() if use_checkpointer else None
-        self.logger.debug(f"💾 Checkpointer: {'Habilitado' if use_checkpointer else 'Deshabilitado'}")
         # Prepare model with bound tools for native function calling
-        self.logger.info("🤖 Preparando modelo con herramientas...")
         self.model_with_tools = self._prepare_model_with_tools()
         # Build modern instruction set with tool descriptions
-        self.logger.info("📝 Construyendo instrucciones modernas...")
         self.instructions = self._build_modern_instructions()
-        self.logger.debug(f"📋 Instrucciones finales: {len(self.instructions)} caracteres")
         # Create the LangGraph workflow
-        self.logger.info("🔄 Creando workflow de LangGraph...")
         self.graph = self._create_modern_workflow()
         # Legacy compatibility attributes (maintained for API compatibility)
         self.conversation = None
         self.agent_executor = None
-        self.logger.info("✅ LangChainBot inicializado correctamente")
-        self.logger.info(f"📊 Resumen: {len(self.tools)} herramientas, {len(self.chat_history)} mensajes en historial")
-        self.logger.info("="*80 + "\n")
     def _initialize_mcp(self, mcp_servers: Dict[str, Any]):
         """
@@ -146,81 +125,14 @@ class LangChainBot:
             MCP tools are automatically appended to the existing tools list and
             will be included in the model's tool binding process.
         """
-        self.logger.info("="*80)
-        self.logger.info("🌐 INICIALIZANDO MCP (Model Context Protocol)")
-        self.logger.info("="*80)
         try:
-            self.logger.info(f"📋 Servidores a inicializar: {len(mcp_servers)}")
-            for server_name, server_config in mcp_servers.items():
-                self.logger.info(f"\n🔌 Servidor: {server_name}")
-                self.logger.debug(f"   Command: {server_config.get('command')}")
-                self.logger.debug(f"   Args: {server_config.get('args')}")
-                self.logger.debug(f"   Transport: {server_config.get('transport')}")
-            self.logger.info("\n🔄 Creando MultiServerMCPClient...")
             self.mcp_client = MultiServerMCPClient(mcp_servers)
-            self.logger.info("✅ MultiServerMCPClient creado")
-            # ===== FIX PARA APACHE/MOD_WSGI =====
-            self.logger.info("🔧 Aplicando fix para compatibilidad Apache/mod_wsgi...")
-            import subprocess
-            original_create = asyncio.create_subprocess_exec
-            async def fixed_create(*args, stdin=None, stdout=None, stderr=None, **kwargs):
-                """Forzar PIPE para evitar heredar sys.stderr de Apache"""
-                return await original_create(
-                    *args,
-                    stdin=stdin or subprocess.PIPE,
-                    stdout=stdout or subprocess.PIPE,
-                    stderr=stderr or subprocess.PIPE,
-                    **kwargs
-                )
-            # Aplicar parche temporalmente
-            asyncio.create_subprocess_exec = fixed_create
-            self.logger.debug("✅ Parche temporal aplicado a asyncio.create_subprocess_exec")
-            try:
-                self.logger.info("🔄 Obteniendo herramientas desde servidores MCP...")
-                mcp_tools = asyncio.run(self.mcp_client.get_tools())
-                self.logger.info(f"📥 Herramientas MCP recibidas: {len(mcp_tools)}")
-            finally:
-                # Restaurar original
-                asyncio.create_subprocess_exec = original_create
-                self.logger.debug("✅ Parche temporal removido, asyncio restaurado")
-            # =====================================
-            if mcp_tools:
-                for i, tool in enumerate(mcp_tools, 1):
-                    tool_name = getattr(tool, 'name', 'Unknown')
-                    tool_desc = getattr(tool, 'description', 'Sin descripción')
-                    self.logger.debug(f"   {i}. {tool_name}: {tool_desc[:100]}...")
+            mcp_tools = asyncio.run(self.mcp_client.get_tools())
             self.tools.extend(mcp_tools)
-            self.logger.info(f"✅ MCP inicializado exitosamente")
-            self.logger.info(f"📊 Total herramientas disponibles: {len(self.tools)}")
-            self.logger.info(f"   - Herramientas MCP: {len(mcp_tools)}")
-            self.logger.info(f"   - Herramientas previas: {len(self.tools) - len(mcp_tools)}")
-            self.logger.info("="*80 + "\n")
         except Exception as e:
-            self.logger.error("="*80)
-            self.logger.error("❌ ERROR EN INICIALIZACIÓN MCP")
-            self.logger.error("="*80)
-            self.logger.error(f"Tipo de error: {type(e).__name__}")
-            self.logger.error(f"Mensaje: {str(e)}")
+            self.logger.error(f"Error inicializando MCP: {e}")
             self.logger.exception("Traceback completo:")
-            self.logger.error("="*80 + "\n")
             self.mcp_client = None
-            # Mensaje de diagnóstico
-            self.logger.warning("⚠️ Continuando sin MCP - solo herramientas locales disponibles")
-            self.logger.warning(f"   Herramientas disponibles: {len(self.tools)}")
     def _prepare_model_with_tools(self):
         """
@@ -233,31 +145,13 @@ class LangChainBot:
             The language model with tools bound, or the original model if no tools are available
         """
         if self.tools:
-            self.logger.info(f"🔗 Vinculando {len(self.tools)} herramientas al modelo")
-            try:
-                bound_model = self.language_model.model.bind_tools(self.tools)
-                self.logger.info("✅ Herramientas vinculadas correctamente")
-                return bound_model
-            except Exception as e:
-                self.logger.error(f"❌ Error vinculando herramientas: {e}")
-                self.logger.exception("Traceback:")
-                return self.language_model.model
-        else:
-            self.logger.debug("⚪ Sin herramientas para vincular, usando modelo base")
-            return self.language_model.model
+            return self.language_model.model.bind_tools(self.tools)
+        return self.language_model.model
     def _build_modern_instructions(self) -> str:
-        """
-        Build modern instructions with automatic tool documentation.
-        Returns:
-            str: Enhanced instructions with tool descriptions
-        """
         instructions = self.base_instructions
         if self.tools:
-            self.logger.info(f"📝 Generando documentación para {len(self.tools)} herramientas")
             tools_description = "\n\n# Available Tools\n\n"
             for tool in self.tools:
@@ -271,7 +165,7 @@ class LangChainBot:
                         required = "**REQUIRED**" if field_info.is_required() else "*optional*"
                         tools_description += f"- `{field_name}` ({field_info.annotation.__name__}, {required}): {field_info.description}\n"
-                # Opción 2: args_schema es un dict (MCP Tools)
+                # Opción 2: args_schema es un dict (MCP Tools) ← NUEVO
                 elif hasattr(tool, 'args_schema') and isinstance(tool.args_schema, dict):
                     if 'properties' in tool.args_schema:
                         tools_description += f"**Parameters:**\n"
@@ -301,7 +195,6 @@ class LangChainBot:
                                 "- Do NOT call tools with empty arguments\n")
             instructions += tools_description
-            self.logger.info(f"✅ Documentación de herramientas agregada ({len(tools_description)} caracteres)")
         return instructions
@@ -318,14 +211,24 @@ class LangChainBot:
         Returns:
             StateGraph: Compiled LangGraph workflow ready for execution
         """
-        self.logger.info("🔄 Construyendo workflow de LangGraph")
         def agent_node(state: ChatState) -> ChatState:
             """
             Main agent node responsible for generating responses and initiating tool calls.
-            """
-            self.logger.debug("🤖 Ejecutando agent_node")
+            This node:
+            1. Extracts the latest user message from the conversation state
+            2. Retrieves relevant context from processed files
+            3. Constructs a complete message history for the model
+            4. Invokes the model with tool binding for native function calling
+            5. Returns updated state with the model's response
+            Args:
+                state (ChatState): Current conversation state
+            Returns:
+                ChatState: Updated state with agent response
+            """
             # Extract the most recent user message
             last_user_message = None
             for msg in reversed(state["messages"]):
@@ -334,15 +237,10 @@ class LangChainBot:
                     break
             if not last_user_message:
-                self.logger.warning("⚠️ No se encontró mensaje de usuario")
                 return state
-            self.logger.debug(f"💬 Mensaje usuario: {last_user_message[:100]}...")
             # Retrieve contextual information from processed files
             context = self._get_context(last_user_message)
-            if context:
-                self.logger.debug(f"📚 Contexto recuperado: {len(context)} caracteres")
             # Build system prompt with optional context
             system_content = self.instructions
@@ -359,33 +257,24 @@ class LangChainBot:
                 elif isinstance(msg, AIMessage):
                     messages.append({"role": "assistant", "content": msg.content or ""})
                 elif isinstance(msg, ToolMessage):
+                    # Convert tool results to user messages for context
                     messages.append({"role": "user", "content": f"Tool result: {msg.content}"})
-            self.logger.debug(f"📨 Enviando {len(messages)} mensajes al modelo")
             try:
                 # Invoke model with native tool binding
                 response = self.model_with_tools.invoke(messages)
-                self.logger.debug(f"✅ Respuesta recibida del modelo")
-                # Check for tool calls
-                if hasattr(response, 'tool_calls') and response.tool_calls:
-                    self.logger.info(f"🔧 Llamadas a herramientas detectadas: {len(response.tool_calls)}")
-                    for i, tc in enumerate(response.tool_calls, 1):
-                        tool_name = tc.get('name', 'Unknown')
-                        self.logger.debug(f"   {i}. {tool_name}")
                 # Return updated state
                 return {
                     **state,
                     "context": context,
-                    "messages": [response]
+                    "messages": [response]  # add_messages annotation handles proper appending
                 }
             except Exception as e:
-                self.logger.error(f"❌ Error en agent_node: {e}")
-                self.logger.exception("Traceback:")
+                self.logger.error(f"Error en agent_node: {e}")
+                self.logger.exception("Traceback completo:")
+                # Graceful fallback for error scenarios
                 fallback_response = AIMessage(content="I apologize, but I encountered an error processing your request.")
                 return {
                     **state,
@@ -396,16 +285,24 @@ class LangChainBot:
         def should_continue(state: ChatState) -> str:
             """
             Conditional edge function to determine workflow continuation.
+            Analyzes the last message to decide whether to execute tools or end the workflow.
+            This leverages LangGraph's native tool calling detection.
+            Args:
+                state (ChatState): Current conversation state
+            Returns:
+                str: Next node to execute ("tools" or "end")
             """
             last_message = state["messages"][-1]
+            # Check for pending tool calls using native tool calling detection
             if (isinstance(last_message, AIMessage) and
                 hasattr(last_message, 'tool_calls') and
                 last_message.tool_calls):
-                self.logger.debug("➡️ Continuando a ejecución de herramientas")
                 return "tools"
-            self.logger.debug("🏁 Finalizando workflow")
             return "end"
         # Construct the workflow graph
@@ -413,18 +310,18 @@ class LangChainBot:
         # Add primary agent node
         workflow.add_node("agent", agent_node)
-        self.logger.debug("✅ Nodo 'agent' agregado")
         # Add tool execution node if tools are available
         if self.tools:
+            # ToolNode automatically handles tool execution and result formatting
             tool_node = ToolNode(self.tools)
             workflow.add_node("tools", tool_node)
-            self.logger.debug("✅ Nodo 'tools' agregado")
         # Define workflow edges and entry point
         workflow.set_entry_point("agent")
         if self.tools:
+            # Conditional routing based on tool call presence
             workflow.add_conditional_edges(
                 "agent",
                 should_continue,
@@ -433,21 +330,17 @@ class LangChainBot:
                     "end": END
                 }
             )
+            # Return to agent after tool execution for final response formatting
             workflow.add_edge("tools", "agent")
-            self.logger.debug("✅ Edges condicionales configurados")
         else:
+            # Direct termination if no tools are available
             workflow.add_edge("agent", END)
-            self.logger.debug("✅ Edge directo a END configurado")
         # Compile workflow with optional checkpointing
         if self.checkpointer:
-            compiled = workflow.compile(checkpointer=self.checkpointer)
-            self.logger.info("✅ Workflow compilado con checkpointer")
+            return workflow.compile(checkpointer=self.checkpointer)
         else:
-            compiled = workflow.compile()
-            self.logger.info("✅ Workflow compilado sin checkpointer")
-        return compiled
+            return workflow.compile()
     # ===== LEGACY API COMPATIBILITY =====
@@ -471,70 +364,59 @@ class LangChainBot:
             This method automatically handles tool execution and context integration
             from processed files while maintaining the original API signature.
         """
-        self.logger.info("="*80)
-        self.logger.info("📨 GET_RESPONSE llamado")
-        self.logger.debug(f"💬 Input: {user_input[:200]}...")
         # Prepare initial workflow state
         initial_state = {
             "messages": self.chat_history + [HumanMessage(content=user_input)],
             "context": ""
         }
-        self.logger.debug(f"📊 Estado inicial: {len(initial_state['messages'])} mensajes")
-        try:
-            # Execute the LangGraph workflow
-            self.logger.info("🔄 Ejecutando workflow...")
-            result = asyncio.run(self.graph.ainvoke(initial_state))
-            self.logger.info("✅ Workflow completado")
-            # Update internal conversation history
-            self.chat_history = result["messages"]
-            self.logger.debug(f"💾 Historial actualizado: {len(self.chat_history)} mensajes")
-            # Extract final response from the last assistant message
-            final_response = ""
-            total_input_tokens = 0
-            total_output_tokens = 0
-            for msg in reversed(result["messages"]):
-                if isinstance(msg, AIMessage) and msg.content:
-                    final_response = msg.content
-                    break
-            # Extract token usage from response metadata
-            last_message = result["messages"][-1]
-            if hasattr(last_message, 'response_metadata'):
-                token_usage = last_message.response_metadata.get('token_usage', {})
-                total_input_tokens = token_usage.get('prompt_tokens', 0)
-                total_output_tokens = token_usage.get('completion_tokens', 0)
-            self.logger.info(f"📊 Tokens: input={total_input_tokens}, output={total_output_tokens}")
-            self.logger.info(f"📝 Respuesta: {len(final_response)} caracteres")
-            self.logger.info("="*80 + "\n")
-            return ResponseModel(
-                user_tokens=total_input_tokens,
-                bot_tokens=total_output_tokens,
-                response=final_response
-            )
-        except Exception as e:
-            self.logger.error("="*80)
-            self.logger.error("❌ ERROR EN GET_RESPONSE")
-            self.logger.error(f"Mensaje: {str(e)}")
-            self.logger.exception("Traceback:")
-            self.logger.error("="*80 + "\n")
-            raise
+        # Execute the LangGraph workflow
+        # Siempre usar ainvoke (funciona para ambos casos)
+        result = asyncio.run(self.graph.ainvoke(initial_state))
+        # Update internal conversation history
+        self.chat_history = result["messages"]
+        # Extract final response from the last assistant message
+        final_response = ""
+        total_input_tokens = 0
+        total_output_tokens = 0
+        for msg in reversed(result["messages"]):
+            if isinstance(msg, AIMessage) and msg.content:
+                final_response = msg.content
+                break
+        # Extract token usage from response metadata
+        last_message = result["messages"][-1]
+        if hasattr(last_message, 'response_metadata'):
+            token_usage = last_message.response_metadata.get('token_usage', {})
+            total_input_tokens = token_usage.get('prompt_tokens', 0)
+            total_output_tokens = token_usage.get('completion_tokens', 0)
+        return ResponseModel(
+            user_tokens=total_input_tokens,
+            bot_tokens=total_output_tokens,
+            response=final_response
+        )
     def get_response_stream(self, user_input: str) -> Generator[str, None, None]:
         """
         Generate a streaming response for real-time user interaction.
-        """
-        self.logger.info("📨 GET_RESPONSE_STREAM llamado")
-        self.logger.debug(f"💬 Input: {user_input[:200]}...")
+        This method provides streaming capabilities while maintaining backward
+        compatibility with the original API.
+        Args:
+            user_input (str): The user's message or query
+        Yields:
+            str: Response chunks as they are generated
+        Note:
+            Current implementation streams complete responses. For token-level
+            streaming, consider using the model's native streaming capabilities.
+        """
         initial_state = {
             "messages": self.chat_history + [HumanMessage(content=user_input)],
             "context": ""
@@ -542,105 +424,156 @@ class LangChainBot:
         accumulated_response = ""
-        try:
-            for chunk in self.graph.stream(initial_state):
-                if "agent" in chunk:
-                    for message in chunk["agent"]["messages"]:
-                        if isinstance(message, AIMessage) and message.content:
-                            accumulated_response = message.content
-                            yield message.content
-            if accumulated_response:
-                self.chat_history.extend([
-                    HumanMessage(content=user_input),
-                    AIMessage(content=accumulated_response)
-                ])
-            self.logger.info(f"✅ Stream completado: {len(accumulated_response)} caracteres")
-        except Exception as e:
-            self.logger.error(f"❌ Error en stream: {e}")
-            self.logger.exception("Traceback:")
-            raise
+        # Stream workflow execution
+        for chunk in self.graph.stream(initial_state):
+            # Extract content from workflow chunks
+            if "agent" in chunk:
+                for message in chunk["agent"]["messages"]:
+                    if isinstance(message, AIMessage) and message.content:
+                        # Stream complete responses (can be enhanced for token-level streaming)
+                        accumulated_response = message.content
+                        yield message.content
+        # Update conversation history after streaming completion
+        if accumulated_response:
+            self.chat_history.extend([
+                HumanMessage(content=user_input),
+                AIMessage(content=accumulated_response)
+            ])
     def load_conversation_history(self, messages: List[Message]):
         """
         Load conversation history from Django model instances.
+        This method maintains compatibility with existing Django-based conversation
+        storage while preparing the history for modern LangGraph processing.
+        Args:
+            messages (List[Message]): List of Django Message model instances
+                Expected to have 'content' and 'is_bot' attributes
         """
-        self.logger.info(f"📥 Cargando historial: {len(messages)} mensajes")
         self.chat_history.clear()
         for message in messages:
             if message.is_bot:
                 self.chat_history.append(AIMessage(content=message.content))
             else:
                 self.chat_history.append(HumanMessage(content=message.content))
-        self.logger.debug("✅ Historial cargado")
     def save_messages(self, user_message: str, bot_response: str):
         """
         Save messages to internal conversation history.
+        This method provides backward compatibility for manual history management.
+        Args:
+            user_message (str): The user's input message
+            bot_response (str): The bot's generated response
         """
-        self.logger.debug("💾 Guardando mensajes en historial interno")
         self.chat_history.append(HumanMessage(content=user_message))
         self.chat_history.append(AIMessage(content=bot_response))
     def process_file(self, file: FileProcessorInterface):
         """
         Process and index a file for contextual retrieval.
-        """
-        self.logger.info("📄 Procesando archivo para indexación")
-        try:
-            document = file.getText()
-            text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-            texts = text_splitter.split_documents(document)
+        This method maintains compatibility with existing file processing workflows
+        while leveraging FAISS for efficient similarity search.
+        Args:
+            file (FileProcessorInterface): File processor instance that implements getText()
-            self.logger.debug(f"✂️ Documento dividido en {len(texts)} chunks")
+        Note:
+            Processed files are automatically available for context retrieval
+            in subsequent conversations without additional configuration.
+        """
+        document = file.getText()
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+        texts = text_splitter.split_documents(document)
-            if self.vector_store is None:
-                self.vector_store = FAISS.from_texts(
-                    [doc.page_content for doc in texts],
-                    self.embeddings
-                )
-                self.logger.info("✅ Vector store creado")
-            else:
-                self.vector_store.add_texts([doc.page_content for doc in texts])
-                self.logger.info("✅ Textos agregados a vector store existente")
-        except Exception as e:
-            self.logger.error(f"❌ Error procesando archivo: {e}")
-            self.logger.exception("Traceback:")
-            raise
+        if self.vector_store is None:
+            self.vector_store = FAISS.from_texts(
+                [doc.page_content for doc in texts],
+                self.embeddings
+            )
+        else:
+            self.vector_store.add_texts([doc.page_content for doc in texts])
     def clear_memory(self):
         """
         Clear conversation history and processed file context.
+        This method resets the bot to a clean state, removing all conversation
+        history and processed file context.
         """
-        self.logger.info("🗑️ Limpiando memoria")
         self.chat_history.clear()
         self.vector_store = None
-        self.logger.debug("✅ Memoria limpiada")
     def get_chat_history(self) -> List[BaseMessage]:
         """
         Retrieve a copy of the current conversation history.
+        Returns:
+            List[BaseMessage]: Copy of the conversation history
         """
         return self.chat_history.copy()
     def set_chat_history(self, history: List[BaseMessage]):
         """
         Set the conversation history from a list of BaseMessage instances.
+        Args:
+            history (List[BaseMessage]): New conversation history to set
         """
-        self.logger.info(f"📝 Estableciendo historial: {len(history)} mensajes")
         self.chat_history = history.copy()
     def _get_context(self, query: str) -> str:
         """
         Retrieve relevant context from processed files using similarity search.
+        This method performs semantic search over processed file content to find
+        the most relevant information for the current query.
+        Args:
+            query (str): The query to search for relevant context
+        Returns:
+            str: Concatenated relevant context from processed files
         """
         if self.vector_store:
-            self.logger.debug(f"🔍 Buscando contexto para query: {query[:100]}...")
             docs = self.vector_store.similarity_search(query, k=4)
-            context = "\n".join([doc.page_content for doc in docs])
-            self.logger.debug(f"✅ Contexto encontrado: {len(context)} caracteres")
-            return context
+            return "\n".join([doc.page_content for doc in docs])
+        return ""
+    def process_file(self, file: FileProcessorInterface):
+        """API original - Procesa archivo y lo añade al vector store"""
+        document = file.getText()
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+        texts = text_splitter.split_documents(document)
+        if self.vector_store is None:
+            self.vector_store = FAISS.from_texts(
+                [doc.page_content for doc in texts],
+                self.embeddings
+            )
+        else:
+            self.vector_store.add_texts([doc.page_content for doc in texts])
+    def clear_memory(self):
+        """API original - Limpia la memoria de conversación"""
+        self.chat_history.clear()
+        self.vector_store = None
+    def get_chat_history(self) -> List[BaseMessage]:
+        """API original - Obtiene el historial completo"""
+        return self.chat_history.copy()
+    def set_chat_history(self, history: List[BaseMessage]):
+        """API original - Establece el historial de conversación"""
+        self.chat_history = history.copy()
+    def _get_context(self, query: str) -> str:
+        """Obtiene contexto relevante de archivos procesados"""
+        if self.vector_store:
+            docs = self.vector_store.similarity_search(query, k=4)
+            return "\n".join([doc.page_content for doc in docs])
         return ""

sonika_langchain_bot-0.0.19/src/sonika_langchain_bot/langchain_clasificator.py ADDED Viewed

@@ -0,0 +1,66 @@
+from pydantic import BaseModel
+from typing import Dict, Any, Type
+from sonika_langchain_bot.langchain_class import ILanguageModel
+class ClassificationResponse(BaseModel):
+    """Respuesta de clasificación con tokens utilizados"""
+    input_tokens: int
+    output_tokens: int
+    result: Dict[str, Any]
+class TextClassifier:
+    def __init__(self, validation_class: Type[BaseModel], llm: ILanguageModel):
+        self.llm = llm
+        self.validation_class = validation_class
+        # Guardamos ambas versiones del modelo
+        self.original_model = self.llm.model  # Sin structured output
+        self.structured_model = self.llm.model.with_structured_output(validation_class)
+    def classify(self, text: str) -> ClassificationResponse:
+        """
+        Clasifica el texto según la clase de validación.
+        Args:
+            text: Texto a clasificar
+        Returns:
+            ClassificationResponse: Objeto con result, input_tokens y output_tokens
+        """
+        prompt = f"""
+        Classify the following text based on the properties defined in the validation class.
+        Text: {text}
+        Only extract the properties mentioned in the validation class.
+        """
+        # Primero invocamos el modelo ORIGINAL para obtener metadata de tokens
+        raw_response = self.original_model.invoke(prompt)
+        # Extraer información de tokens del AIMessage original
+        input_tokens = 0
+        output_tokens = 0
+        if hasattr(raw_response, 'response_metadata'):
+            token_usage = raw_response.response_metadata.get('token_usage', {})
+            input_tokens = token_usage.get('prompt_tokens', 0)
+            output_tokens = token_usage.get('completion_tokens', 0)
+        # Ahora invocamos con structured output para obtener el objeto parseado
+        response = self.structured_model.invoke(prompt)
+        # Validar que el response es de la clase correcta
+        if isinstance(response, self.validation_class):
+            # Crear el resultado dinámicamente basado en los atributos
+            result_data = {
+                field: getattr(response, field)
+                for field in self.validation_class.__fields__.keys()
+            }
+            return ClassificationResponse(
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                result=result_data
+            )
+        else:
+            raise ValueError(f"The response is not of type '{self.validation_class.__name__}'")

{sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19/src/sonika_langchain_bot.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sonika-langchain-bot
-Version: 0.0.17
+Version: 0.0.19
 Summary: Agente langchain con LLM
 Author: Erley Blanco Carvajal
 License: MIT License

{sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/test/test.py RENAMED Viewed

@@ -76,6 +76,6 @@ def clasification():
     result = classifier.classify("how are you?")
     print(result)
-bot_bdi()
+#bot_bdi()
 #bot_bdi_streaming()
-#clasification()
+clasification()

sonika_langchain_bot-0.0.17/src/sonika_langchain_bot/langchain_clasificator.py DELETED Viewed

@@ -1,30 +0,0 @@
-from pydantic import BaseModel
-from typing import Dict, Any, Type
-from sonika_langchain_bot.langchain_class import ILanguageModel
-# Clase para realizar la clasificación de texto
-class TextClassifier:
-    def __init__(self, validation_class: Type[BaseModel], llm: ILanguageModel):
-        self.llm =llm
-        self.validation_class = validation_class
-        #configuramos el modelo para que tenga una estructura de salida
-        self.llm.model =  self.llm.model.with_structured_output(validation_class)
-    def classify(self, text: str) -> Dict[str, Any]:
-        # Crear el template del prompt
-        prompt = f"""
-        Classify the following text based on the properties defined in the validation class.
-        Text: {text}
-        Only extract the properties mentioned in the validation class.
-        """
-        response = self.llm.invoke(prompt=prompt)
-        # Asegurarse de que el `response` es de la clase de validación proporcionada
-        if isinstance(response, self.validation_class):
-            # Crear el resultado dinámicamente basado en los atributos de la clase de validación
-            result = {field: getattr(response, field) for field in self.validation_class.__fields__.keys()}
-            return result
-        else:
-            raise ValueError(f"The response is not of type '{self.validation_class.__name__}'")