sonika-langchain-bot 0.0.17__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sonika-langchain-bot might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- from typing import Generator, List, Optional, Dict, Any, TypedDict, Annotated
1
+ from typing import Generator, List, Optional, Dict, Any, TypedDict, Annotated, Callable
2
2
  import asyncio
3
3
  import logging
4
4
  from langchain.schema import AIMessage, HumanMessage, BaseMessage
@@ -6,6 +6,7 @@ from langchain_core.messages import ToolMessage
6
6
  from langchain.text_splitter import CharacterTextSplitter
7
7
  from langchain_community.vectorstores import FAISS
8
8
  from langchain_community.tools import BaseTool
9
+ from langchain.callbacks.base import BaseCallbackHandler
9
10
  from langgraph.graph import StateGraph, END, add_messages
10
11
  from langgraph.prebuilt import ToolNode
11
12
  from langgraph.checkpoint.memory import MemorySaver
@@ -25,6 +26,101 @@ class ChatState(TypedDict):
25
26
  messages: Annotated[List[BaseMessage], add_messages]
26
27
  context: str
27
28
 
29
+ class _InternalToolLogger(BaseCallbackHandler):
30
+ """
31
+ Internal callback handler that bridges LangChain callbacks to user-provided functions.
32
+
33
+ This class is used internally to forward tool execution events to the optional
34
+ callback functions provided by the user during bot initialization.
35
+ """
36
+
37
+ def __init__(self,
38
+ on_start: Optional[Callable[[str, str], None]] = None,
39
+ on_end: Optional[Callable[[str, str], None]] = None,
40
+ on_error: Optional[Callable[[str, str], None]] = None):
41
+ """
42
+ Initialize the internal tool logger.
43
+
44
+ Args:
45
+ on_start: Optional callback function called when a tool starts execution
46
+ on_end: Optional callback function called when a tool completes successfully
47
+ on_error: Optional callback function called when a tool encounters an error
48
+ """
49
+ super().__init__()
50
+ self.on_start_callback = on_start
51
+ self.on_end_callback = on_end
52
+ self.on_error_callback = on_error
53
+ self.current_tool_name = None
54
+ self.tool_executions = [] # Para tracking interno si se necesita
55
+
56
+ def on_tool_start(self, serialized: Dict[str, Any], input_str: str, **kwargs) -> None:
57
+
58
+ print(f"DEBUG: on_tool_start se ejecutó!") # ← AGREGAR ESTO
59
+
60
+
61
+ """Called when a tool starts executing."""
62
+ tool_name = serialized.get("name", "unknown")
63
+ self.current_tool_name = tool_name
64
+
65
+ # Track execution internally
66
+ self.tool_executions.append({
67
+ "tool": tool_name,
68
+ "input": input_str,
69
+ "status": "started"
70
+ })
71
+
72
+ # Call user's callback if provided
73
+ if self.on_start_callback:
74
+ try:
75
+ self.on_start_callback(tool_name, input_str)
76
+ except Exception as e:
77
+ # Don't let user callback errors break the workflow
78
+ logging.error(f"Error in on_tool_start callback: {e}")
79
+
80
+ def on_tool_end(self, output: str, **kwargs) -> None:
81
+ print(f"DEBUG: on_tool_end se ejecutó!")
82
+ tool_name = self.current_tool_name or "unknown"
83
+
84
+ # Convert output to string if it's a ToolMessage or other object
85
+ if hasattr(output, 'content'):
86
+ output_str = output.content
87
+ elif isinstance(output, str):
88
+ output_str = output
89
+ else:
90
+ output_str = str(output)
91
+
92
+ # Update internal tracking
93
+ if self.tool_executions:
94
+ self.tool_executions[-1]["status"] = "success"
95
+ self.tool_executions[-1]["output"] = output_str
96
+
97
+ # Call user's callback if provided
98
+ if self.on_end_callback:
99
+ try:
100
+ self.on_end_callback(tool_name, output_str)
101
+ except Exception as e:
102
+ logging.error(f"Error in on_tool_end callback: {e}")
103
+
104
+ self.current_tool_name = None
105
+
106
+ def on_tool_error(self, error: Exception, **kwargs) -> None: # ← CORRECTO
107
+ print(f"DEBUG: on_tool_error se ejecutó!")
108
+ tool_name = self.current_tool_name or "unknown"
109
+ error_message = str(error)
110
+
111
+ # Update internal tracking
112
+ if self.tool_executions:
113
+ self.tool_executions[-1]["status"] = "error"
114
+ self.tool_executions[-1]["error"] = error_message
115
+
116
+ # Call user's callback if provided
117
+ if self.on_error_callback:
118
+ try:
119
+ self.on_error_callback(tool_name, error_message)
120
+ except Exception as e:
121
+ logging.error(f"Error in on_tool_error callback: {e}")
122
+
123
+ self.current_tool_name = None
28
124
 
29
125
  class LangChainBot:
30
126
  """
@@ -39,8 +135,8 @@ class LangChainBot:
39
135
  - File processing with vector search
40
136
  - Thread-based conversation persistence
41
137
  - Streaming responses
138
+ - Tool execution callbacks for real-time monitoring
42
139
  - Backward compatibility with legacy APIs
43
- - Debug logging injection for production troubleshooting
44
140
  """
45
141
 
46
142
  def __init__(self,
@@ -50,9 +146,12 @@ class LangChainBot:
50
146
  tools: Optional[List[BaseTool]] = None,
51
147
  mcp_servers: Optional[Dict[str, Any]] = None,
52
148
  use_checkpointer: bool = False,
53
- logger: Optional[logging.Logger] = None):
149
+ logger: Optional[logging.Logger] = None,
150
+ on_tool_start: Optional[Callable[[str, str], None]] = None,
151
+ on_tool_end: Optional[Callable[[str, str], None]] = None,
152
+ on_tool_error: Optional[Callable[[str, str], None]] = None):
54
153
  """
55
- Initialize the modern LangGraph bot with optional MCP support.
154
+ Initialize the modern LangGraph bot with optional MCP support and tool execution callbacks.
56
155
 
57
156
  Args:
58
157
  language_model (ILanguageModel): The language model to use for generation
@@ -61,28 +160,41 @@ class LangChainBot:
61
160
  tools (List[BaseTool], optional): Traditional LangChain tools to bind to the model
62
161
  mcp_servers (Dict[str, Any], optional): MCP server configurations for dynamic tool loading
63
162
  use_checkpointer (bool): Enable automatic conversation persistence using LangGraph checkpoints
64
- logger (logging.Logger, optional): Logger instance for debugging. If None, uses silent NullHandler
163
+ logger (Optional[logging.Logger]): Logger instance for error tracking (silent by default if not provided)
164
+ on_tool_start (Callable[[str, str], None], optional): Callback function executed when a tool starts.
165
+ Receives (tool_name: str, input_data: str)
166
+ on_tool_end (Callable[[str, str], None], optional): Callback function executed when a tool completes successfully.
167
+ Receives (tool_name: str, output: str)
168
+ on_tool_error (Callable[[str, str], None], optional): Callback function executed when a tool fails.
169
+ Receives (tool_name: str, error_message: str)
65
170
 
66
171
  Note:
67
172
  The instructions will be automatically enhanced with tool descriptions
68
173
  when tools are provided, eliminating the need for manual tool instruction formatting.
174
+
175
+ Example:
176
+ ```python
177
+ def on_tool_execution(tool_name: str, input_data: str):
178
+ print(f"Tool {tool_name} started with input: {input_data}")
179
+
180
+ bot = LangChainBot(
181
+ language_model=model,
182
+ embeddings=embeddings,
183
+ instructions="You are a helpful assistant",
184
+ on_tool_start=on_tool_execution
185
+ )
186
+ ```
69
187
  """
70
188
  # Configure logger (silent by default if not provided)
71
189
  self.logger = logger or logging.getLogger(__name__)
72
190
  if logger is None:
73
191
  self.logger.addHandler(logging.NullHandler())
74
192
 
75
- self.logger.info("="*80)
76
- self.logger.info("🚀 Inicializando LangChainBot")
77
- self.logger.info("="*80)
78
-
79
193
  # Core components
80
194
  self.language_model = language_model
81
195
  self.embeddings = embeddings
82
196
  self.base_instructions = instructions
83
197
 
84
- self.logger.debug(f"📋 Instrucciones base: {len(instructions)} caracteres")
85
-
86
198
  # Backward compatibility attributes
87
199
  self.chat_history: List[BaseMessage] = []
88
200
  self.vector_store = None
@@ -91,39 +203,30 @@ class LangChainBot:
91
203
  self.tools = tools or []
92
204
  self.mcp_client = None
93
205
 
94
- self.logger.info(f"🔧 Herramientas iniciales: {len(self.tools)}")
206
+ # Tool execution callbacks
207
+ self.on_tool_start = on_tool_start
208
+ self.on_tool_end = on_tool_end
209
+ self.on_tool_error = on_tool_error
95
210
 
96
211
  # Initialize MCP servers if provided
97
212
  if mcp_servers:
98
- self.logger.info(f"🌐 Servidores MCP detectados: {len(mcp_servers)}")
99
213
  self._initialize_mcp(mcp_servers)
100
- else:
101
- self.logger.debug("⚪ Sin servidores MCP configurados")
102
214
 
103
215
  # Configure persistence layer
104
216
  self.checkpointer = MemorySaver() if use_checkpointer else None
105
- self.logger.debug(f"💾 Checkpointer: {'Habilitado' if use_checkpointer else 'Deshabilitado'}")
106
217
 
107
218
  # Prepare model with bound tools for native function calling
108
- self.logger.info("🤖 Preparando modelo con herramientas...")
109
219
  self.model_with_tools = self._prepare_model_with_tools()
110
220
 
111
221
  # Build modern instruction set with tool descriptions
112
- self.logger.info("📝 Construyendo instrucciones modernas...")
113
222
  self.instructions = self._build_modern_instructions()
114
- self.logger.debug(f"📋 Instrucciones finales: {len(self.instructions)} caracteres")
115
223
 
116
224
  # Create the LangGraph workflow
117
- self.logger.info("🔄 Creando workflow de LangGraph...")
118
225
  self.graph = self._create_modern_workflow()
119
226
 
120
227
  # Legacy compatibility attributes (maintained for API compatibility)
121
228
  self.conversation = None
122
229
  self.agent_executor = None
123
-
124
- self.logger.info("✅ LangChainBot inicializado correctamente")
125
- self.logger.info(f"📊 Resumen: {len(self.tools)} herramientas, {len(self.chat_history)} mensajes en historial")
126
- self.logger.info("="*80 + "\n")
127
230
 
128
231
  def _initialize_mcp(self, mcp_servers: Dict[str, Any]):
129
232
  """
@@ -146,81 +249,14 @@ class LangChainBot:
146
249
  MCP tools are automatically appended to the existing tools list and
147
250
  will be included in the model's tool binding process.
148
251
  """
149
- self.logger.info("="*80)
150
- self.logger.info("🌐 INICIALIZANDO MCP (Model Context Protocol)")
151
- self.logger.info("="*80)
152
-
153
252
  try:
154
- self.logger.info(f"📋 Servidores a inicializar: {len(mcp_servers)}")
155
-
156
- for server_name, server_config in mcp_servers.items():
157
- self.logger.info(f"\n🔌 Servidor: {server_name}")
158
- self.logger.debug(f" Command: {server_config.get('command')}")
159
- self.logger.debug(f" Args: {server_config.get('args')}")
160
- self.logger.debug(f" Transport: {server_config.get('transport')}")
161
-
162
- self.logger.info("\n🔄 Creando MultiServerMCPClient...")
163
253
  self.mcp_client = MultiServerMCPClient(mcp_servers)
164
- self.logger.info("✅ MultiServerMCPClient creado")
165
-
166
- # ===== FIX PARA APACHE/MOD_WSGI =====
167
- self.logger.info("🔧 Aplicando fix para compatibilidad Apache/mod_wsgi...")
168
-
169
- import subprocess
170
- original_create = asyncio.create_subprocess_exec
171
-
172
- async def fixed_create(*args, stdin=None, stdout=None, stderr=None, **kwargs):
173
- """Forzar PIPE para evitar heredar sys.stderr de Apache"""
174
- return await original_create(
175
- *args,
176
- stdin=stdin or subprocess.PIPE,
177
- stdout=stdout or subprocess.PIPE,
178
- stderr=stderr or subprocess.PIPE,
179
- **kwargs
180
- )
181
-
182
- # Aplicar parche temporalmente
183
- asyncio.create_subprocess_exec = fixed_create
184
- self.logger.debug("✅ Parche temporal aplicado a asyncio.create_subprocess_exec")
185
-
186
- try:
187
- self.logger.info("🔄 Obteniendo herramientas desde servidores MCP...")
188
- mcp_tools = asyncio.run(self.mcp_client.get_tools())
189
- self.logger.info(f"📥 Herramientas MCP recibidas: {len(mcp_tools)}")
190
- finally:
191
- # Restaurar original
192
- asyncio.create_subprocess_exec = original_create
193
- self.logger.debug("✅ Parche temporal removido, asyncio restaurado")
194
- # =====================================
195
-
196
- if mcp_tools:
197
- for i, tool in enumerate(mcp_tools, 1):
198
- tool_name = getattr(tool, 'name', 'Unknown')
199
- tool_desc = getattr(tool, 'description', 'Sin descripción')
200
- self.logger.debug(f" {i}. {tool_name}: {tool_desc[:100]}...")
201
-
254
+ mcp_tools = asyncio.run(self.mcp_client.get_tools())
202
255
  self.tools.extend(mcp_tools)
203
-
204
- self.logger.info(f"✅ MCP inicializado exitosamente")
205
- self.logger.info(f"📊 Total herramientas disponibles: {len(self.tools)}")
206
- self.logger.info(f" - Herramientas MCP: {len(mcp_tools)}")
207
- self.logger.info(f" - Herramientas previas: {len(self.tools) - len(mcp_tools)}")
208
- self.logger.info("="*80 + "\n")
209
-
210
256
  except Exception as e:
211
- self.logger.error("="*80)
212
- self.logger.error("❌ ERROR EN INICIALIZACIÓN MCP")
213
- self.logger.error("="*80)
214
- self.logger.error(f"Tipo de error: {type(e).__name__}")
215
- self.logger.error(f"Mensaje: {str(e)}")
257
+ self.logger.error(f"Error inicializando MCP: {e}")
216
258
  self.logger.exception("Traceback completo:")
217
- self.logger.error("="*80 + "\n")
218
-
219
259
  self.mcp_client = None
220
-
221
- # Mensaje de diagnóstico
222
- self.logger.warning("⚠️ Continuando sin MCP - solo herramientas locales disponibles")
223
- self.logger.warning(f" Herramientas disponibles: {len(self.tools)}")
224
260
 
225
261
  def _prepare_model_with_tools(self):
226
262
  """
@@ -233,31 +269,13 @@ class LangChainBot:
233
269
  The language model with tools bound, or the original model if no tools are available
234
270
  """
235
271
  if self.tools:
236
- self.logger.info(f"🔗 Vinculando {len(self.tools)} herramientas al modelo")
237
- try:
238
- bound_model = self.language_model.model.bind_tools(self.tools)
239
- self.logger.info("✅ Herramientas vinculadas correctamente")
240
- return bound_model
241
- except Exception as e:
242
- self.logger.error(f"❌ Error vinculando herramientas: {e}")
243
- self.logger.exception("Traceback:")
244
- return self.language_model.model
245
- else:
246
- self.logger.debug("⚪ Sin herramientas para vincular, usando modelo base")
247
- return self.language_model.model
272
+ return self.language_model.model.bind_tools(self.tools)
273
+ return self.language_model.model
248
274
 
249
275
  def _build_modern_instructions(self) -> str:
250
- """
251
- Build modern instructions with automatic tool documentation.
252
-
253
- Returns:
254
- str: Enhanced instructions with tool descriptions
255
- """
256
276
  instructions = self.base_instructions
257
277
 
258
278
  if self.tools:
259
- self.logger.info(f"📝 Generando documentación para {len(self.tools)} herramientas")
260
-
261
279
  tools_description = "\n\n# Available Tools\n\n"
262
280
 
263
281
  for tool in self.tools:
@@ -271,7 +289,7 @@ class LangChainBot:
271
289
  required = "**REQUIRED**" if field_info.is_required() else "*optional*"
272
290
  tools_description += f"- `{field_name}` ({field_info.annotation.__name__}, {required}): {field_info.description}\n"
273
291
 
274
- # Opción 2: args_schema es un dict (MCP Tools)
292
+ # Opción 2: args_schema es un dict (MCP Tools) ← NUEVO
275
293
  elif hasattr(tool, 'args_schema') and isinstance(tool.args_schema, dict):
276
294
  if 'properties' in tool.args_schema:
277
295
  tools_description += f"**Parameters:**\n"
@@ -301,7 +319,6 @@ class LangChainBot:
301
319
  "- Do NOT call tools with empty arguments\n")
302
320
 
303
321
  instructions += tools_description
304
- self.logger.info(f"✅ Documentación de herramientas agregada ({len(tools_description)} caracteres)")
305
322
 
306
323
  return instructions
307
324
 
@@ -318,14 +335,24 @@ class LangChainBot:
318
335
  Returns:
319
336
  StateGraph: Compiled LangGraph workflow ready for execution
320
337
  """
321
- self.logger.info("🔄 Construyendo workflow de LangGraph")
322
338
 
323
339
  def agent_node(state: ChatState) -> ChatState:
324
340
  """
325
341
  Main agent node responsible for generating responses and initiating tool calls.
326
- """
327
- self.logger.debug("🤖 Ejecutando agent_node")
328
342
 
343
+ This node:
344
+ 1. Extracts the latest user message from the conversation state
345
+ 2. Retrieves relevant context from processed files
346
+ 3. Constructs a complete message history for the model
347
+ 4. Invokes the model with tool binding for native function calling
348
+ 5. Returns updated state with the model's response
349
+
350
+ Args:
351
+ state (ChatState): Current conversation state
352
+
353
+ Returns:
354
+ ChatState: Updated state with agent response
355
+ """
329
356
  # Extract the most recent user message
330
357
  last_user_message = None
331
358
  for msg in reversed(state["messages"]):
@@ -334,15 +361,10 @@ class LangChainBot:
334
361
  break
335
362
 
336
363
  if not last_user_message:
337
- self.logger.warning("⚠️ No se encontró mensaje de usuario")
338
364
  return state
339
365
 
340
- self.logger.debug(f"💬 Mensaje usuario: {last_user_message[:100]}...")
341
-
342
366
  # Retrieve contextual information from processed files
343
367
  context = self._get_context(last_user_message)
344
- if context:
345
- self.logger.debug(f"📚 Contexto recuperado: {len(context)} caracteres")
346
368
 
347
369
  # Build system prompt with optional context
348
370
  system_content = self.instructions
@@ -359,33 +381,24 @@ class LangChainBot:
359
381
  elif isinstance(msg, AIMessage):
360
382
  messages.append({"role": "assistant", "content": msg.content or ""})
361
383
  elif isinstance(msg, ToolMessage):
384
+ # Convert tool results to user messages for context
362
385
  messages.append({"role": "user", "content": f"Tool result: {msg.content}"})
363
386
 
364
- self.logger.debug(f"📨 Enviando {len(messages)} mensajes al modelo")
365
-
366
387
  try:
367
388
  # Invoke model with native tool binding
368
389
  response = self.model_with_tools.invoke(messages)
369
390
 
370
- self.logger.debug(f"✅ Respuesta recibida del modelo")
371
-
372
- # Check for tool calls
373
- if hasattr(response, 'tool_calls') and response.tool_calls:
374
- self.logger.info(f"🔧 Llamadas a herramientas detectadas: {len(response.tool_calls)}")
375
- for i, tc in enumerate(response.tool_calls, 1):
376
- tool_name = tc.get('name', 'Unknown')
377
- self.logger.debug(f" {i}. {tool_name}")
378
-
379
391
  # Return updated state
380
392
  return {
381
393
  **state,
382
394
  "context": context,
383
- "messages": [response]
395
+ "messages": [response] # add_messages annotation handles proper appending
384
396
  }
385
397
 
386
398
  except Exception as e:
387
- self.logger.error(f"Error en agent_node: {e}")
388
- self.logger.exception("Traceback:")
399
+ self.logger.error(f"Error en agent_node: {e}")
400
+ self.logger.exception("Traceback completo:")
401
+ # Graceful fallback for error scenarios
389
402
  fallback_response = AIMessage(content="I apologize, but I encountered an error processing your request.")
390
403
  return {
391
404
  **state,
@@ -396,16 +409,24 @@ class LangChainBot:
396
409
  def should_continue(state: ChatState) -> str:
397
410
  """
398
411
  Conditional edge function to determine workflow continuation.
412
+
413
+ Analyzes the last message to decide whether to execute tools or end the workflow.
414
+ This leverages LangGraph's native tool calling detection.
415
+
416
+ Args:
417
+ state (ChatState): Current conversation state
418
+
419
+ Returns:
420
+ str: Next node to execute ("tools" or "end")
399
421
  """
400
422
  last_message = state["messages"][-1]
401
423
 
424
+ # Check for pending tool calls using native tool calling detection
402
425
  if (isinstance(last_message, AIMessage) and
403
426
  hasattr(last_message, 'tool_calls') and
404
427
  last_message.tool_calls):
405
- self.logger.debug("➡️ Continuando a ejecución de herramientas")
406
428
  return "tools"
407
429
 
408
- self.logger.debug("🏁 Finalizando workflow")
409
430
  return "end"
410
431
 
411
432
  # Construct the workflow graph
@@ -413,18 +434,18 @@ class LangChainBot:
413
434
 
414
435
  # Add primary agent node
415
436
  workflow.add_node("agent", agent_node)
416
- self.logger.debug("✅ Nodo 'agent' agregado")
417
437
 
418
438
  # Add tool execution node if tools are available
419
439
  if self.tools:
440
+ # ToolNode automatically handles tool execution and result formatting
420
441
  tool_node = ToolNode(self.tools)
421
442
  workflow.add_node("tools", tool_node)
422
- self.logger.debug("✅ Nodo 'tools' agregado")
423
443
 
424
444
  # Define workflow edges and entry point
425
445
  workflow.set_entry_point("agent")
426
446
 
427
447
  if self.tools:
448
+ # Conditional routing based on tool call presence
428
449
  workflow.add_conditional_edges(
429
450
  "agent",
430
451
  should_continue,
@@ -433,21 +454,17 @@ class LangChainBot:
433
454
  "end": END
434
455
  }
435
456
  )
457
+ # Return to agent after tool execution for final response formatting
436
458
  workflow.add_edge("tools", "agent")
437
- self.logger.debug("✅ Edges condicionales configurados")
438
459
  else:
460
+ # Direct termination if no tools are available
439
461
  workflow.add_edge("agent", END)
440
- self.logger.debug("✅ Edge directo a END configurado")
441
462
 
442
463
  # Compile workflow with optional checkpointing
443
464
  if self.checkpointer:
444
- compiled = workflow.compile(checkpointer=self.checkpointer)
445
- self.logger.info("✅ Workflow compilado con checkpointer")
465
+ return workflow.compile(checkpointer=self.checkpointer)
446
466
  else:
447
- compiled = workflow.compile()
448
- self.logger.info("✅ Workflow compilado sin checkpointer")
449
-
450
- return compiled
467
+ return workflow.compile()
451
468
 
452
469
  # ===== LEGACY API COMPATIBILITY =====
453
470
 
@@ -457,6 +474,7 @@ class LangChainBot:
457
474
 
458
475
  This method provides the primary interface for single-turn conversations,
459
476
  maintaining backward compatibility with existing ChatService implementations.
477
+ Tool execution callbacks (if provided) will be triggered during execution.
460
478
 
461
479
  Args:
462
480
  user_input (str): The user's message or query
@@ -471,176 +489,202 @@ class LangChainBot:
471
489
  This method automatically handles tool execution and context integration
472
490
  from processed files while maintaining the original API signature.
473
491
  """
474
- self.logger.info("="*80)
475
- self.logger.info("📨 GET_RESPONSE llamado")
476
- self.logger.debug(f"💬 Input: {user_input[:200]}...")
477
-
478
492
  # Prepare initial workflow state
479
493
  initial_state = {
480
494
  "messages": self.chat_history + [HumanMessage(content=user_input)],
481
495
  "context": ""
482
496
  }
483
497
 
484
- self.logger.debug(f"📊 Estado inicial: {len(initial_state['messages'])} mensajes")
485
-
486
- try:
487
- # Execute the LangGraph workflow
488
- self.logger.info("🔄 Ejecutando workflow...")
489
- result = asyncio.run(self.graph.ainvoke(initial_state))
490
- self.logger.info("✅ Workflow completado")
491
-
492
- # Update internal conversation history
493
- self.chat_history = result["messages"]
494
- self.logger.debug(f"💾 Historial actualizado: {len(self.chat_history)} mensajes")
495
-
496
- # Extract final response from the last assistant message
497
- final_response = ""
498
- total_input_tokens = 0
499
- total_output_tokens = 0
500
-
501
- for msg in reversed(result["messages"]):
502
- if isinstance(msg, AIMessage) and msg.content:
503
- final_response = msg.content
504
- break
505
-
506
- # Extract token usage from response metadata
507
- last_message = result["messages"][-1]
508
- if hasattr(last_message, 'response_metadata'):
509
- token_usage = last_message.response_metadata.get('token_usage', {})
510
- total_input_tokens = token_usage.get('prompt_tokens', 0)
511
- total_output_tokens = token_usage.get('completion_tokens', 0)
512
-
513
- self.logger.info(f"📊 Tokens: input={total_input_tokens}, output={total_output_tokens}")
514
- self.logger.info(f"📝 Respuesta: {len(final_response)} caracteres")
515
- self.logger.info("="*80 + "\n")
516
-
517
- return ResponseModel(
518
- user_tokens=total_input_tokens,
519
- bot_tokens=total_output_tokens,
520
- response=final_response
498
+ # Create callback handler if any callbacks are provided
499
+ config = {}
500
+ if self.on_tool_start or self.on_tool_end or self.on_tool_error:
501
+ tool_logger = _InternalToolLogger(
502
+ on_start=self.on_tool_start,
503
+ on_end=self.on_tool_end,
504
+ on_error=self.on_tool_error
521
505
  )
522
-
523
- except Exception as e:
524
- self.logger.error("="*80)
525
- self.logger.error("❌ ERROR EN GET_RESPONSE")
526
- self.logger.error(f"Mensaje: {str(e)}")
527
- self.logger.exception("Traceback:")
528
- self.logger.error("="*80 + "\n")
529
- raise
506
+ config["callbacks"] = [tool_logger]
507
+
508
+ # Execute the LangGraph workflow with callbacks
509
+ result = asyncio.run(self.graph.ainvoke(initial_state, config=config))
510
+
511
+ # Update internal conversation history
512
+ self.chat_history = result["messages"]
513
+
514
+ # Extract final response from the last assistant message
515
+ final_response = ""
516
+ total_input_tokens = 0
517
+ total_output_tokens = 0
518
+
519
+ for msg in reversed(result["messages"]):
520
+ if isinstance(msg, AIMessage) and msg.content:
521
+ final_response = msg.content
522
+ break
523
+
524
+ # Extract token usage from response metadata
525
+ last_message = result["messages"][-1]
526
+ if hasattr(last_message, 'response_metadata'):
527
+ token_usage = last_message.response_metadata.get('token_usage', {})
528
+ total_input_tokens = token_usage.get('prompt_tokens', 0)
529
+ total_output_tokens = token_usage.get('completion_tokens', 0)
530
+
531
+ return ResponseModel(
532
+ user_tokens=total_input_tokens,
533
+ bot_tokens=total_output_tokens,
534
+ response=final_response
535
+ )
530
536
 
531
537
  def get_response_stream(self, user_input: str) -> Generator[str, None, None]:
532
538
  """
533
539
  Generate a streaming response for real-time user interaction.
534
- """
535
- self.logger.info("📨 GET_RESPONSE_STREAM llamado")
536
- self.logger.debug(f"💬 Input: {user_input[:200]}...")
537
540
 
541
+ This method provides streaming capabilities while maintaining backward
542
+ compatibility with the original API. Tool execution callbacks (if provided)
543
+ will be triggered during execution.
544
+
545
+ Args:
546
+ user_input (str): The user's message or query
547
+
548
+ Yields:
549
+ str: Response chunks as they are generated
550
+
551
+ Note:
552
+ Current implementation streams complete responses. For token-level
553
+ streaming, consider using the model's native streaming capabilities.
554
+ """
538
555
  initial_state = {
539
556
  "messages": self.chat_history + [HumanMessage(content=user_input)],
540
557
  "context": ""
541
558
  }
542
559
 
560
+ # Create callback handler if any callbacks are provided
561
+ config = {}
562
+ if self.on_tool_start or self.on_tool_end or self.on_tool_error:
563
+ tool_logger = _InternalToolLogger(
564
+ on_start=self.on_tool_start,
565
+ on_end=self.on_tool_end,
566
+ on_error=self.on_tool_error
567
+ )
568
+ config["callbacks"] = [tool_logger]
569
+
543
570
  accumulated_response = ""
544
571
 
545
- try:
546
- for chunk in self.graph.stream(initial_state):
547
- if "agent" in chunk:
548
- for message in chunk["agent"]["messages"]:
549
- if isinstance(message, AIMessage) and message.content:
550
- accumulated_response = message.content
551
- yield message.content
552
-
553
- if accumulated_response:
554
- self.chat_history.extend([
555
- HumanMessage(content=user_input),
556
- AIMessage(content=accumulated_response)
557
- ])
558
-
559
- self.logger.info(f"✅ Stream completado: {len(accumulated_response)} caracteres")
560
-
561
- except Exception as e:
562
- self.logger.error(f"❌ Error en stream: {e}")
563
- self.logger.exception("Traceback:")
564
- raise
572
+ # Stream workflow execution with callbacks
573
+ for chunk in self.graph.stream(initial_state, config=config):
574
+ # Extract content from workflow chunks
575
+ if "agent" in chunk:
576
+ for message in chunk["agent"]["messages"]:
577
+ if isinstance(message, AIMessage) and message.content:
578
+ # Stream complete responses (can be enhanced for token-level streaming)
579
+ accumulated_response = message.content
580
+ yield message.content
581
+
582
+ # Update conversation history after streaming completion
583
+ if accumulated_response:
584
+ self.chat_history.extend([
585
+ HumanMessage(content=user_input),
586
+ AIMessage(content=accumulated_response)
587
+ ])
565
588
 
566
589
  def load_conversation_history(self, messages: List[Message]):
567
590
  """
568
591
  Load conversation history from Django model instances.
592
+
593
+ This method maintains compatibility with existing Django-based conversation
594
+ storage while preparing the history for modern LangGraph processing.
595
+
596
+ Args:
597
+ messages (List[Message]): List of Django Message model instances
598
+ Expected to have 'content' and 'is_bot' attributes
569
599
  """
570
- self.logger.info(f"📥 Cargando historial: {len(messages)} mensajes")
571
600
  self.chat_history.clear()
572
601
  for message in messages:
573
602
  if message.is_bot:
574
603
  self.chat_history.append(AIMessage(content=message.content))
575
604
  else:
576
605
  self.chat_history.append(HumanMessage(content=message.content))
577
- self.logger.debug("✅ Historial cargado")
578
606
 
579
607
  def save_messages(self, user_message: str, bot_response: str):
580
608
  """
581
609
  Save messages to internal conversation history.
610
+
611
+ This method provides backward compatibility for manual history management.
612
+
613
+ Args:
614
+ user_message (str): The user's input message
615
+ bot_response (str): The bot's generated response
582
616
  """
583
- self.logger.debug("💾 Guardando mensajes en historial interno")
584
617
  self.chat_history.append(HumanMessage(content=user_message))
585
618
  self.chat_history.append(AIMessage(content=bot_response))
586
619
 
587
620
  def process_file(self, file: FileProcessorInterface):
588
621
  """
589
622
  Process and index a file for contextual retrieval.
590
- """
591
- self.logger.info("📄 Procesando archivo para indexación")
592
- try:
593
- document = file.getText()
594
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
595
- texts = text_splitter.split_documents(document)
623
+
624
+ This method maintains compatibility with existing file processing workflows
625
+ while leveraging FAISS for efficient similarity search.
626
+
627
+ Args:
628
+ file (FileProcessorInterface): File processor instance that implements getText()
596
629
 
597
- self.logger.debug(f"✂️ Documento dividido en {len(texts)} chunks")
630
+ Note:
631
+ Processed files are automatically available for context retrieval
632
+ in subsequent conversations without additional configuration.
633
+ """
634
+ document = file.getText()
635
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
636
+ texts = text_splitter.split_documents(document)
598
637
 
599
- if self.vector_store is None:
600
- self.vector_store = FAISS.from_texts(
601
- [doc.page_content for doc in texts],
602
- self.embeddings
603
- )
604
- self.logger.info("✅ Vector store creado")
605
- else:
606
- self.vector_store.add_texts([doc.page_content for doc in texts])
607
- self.logger.info("✅ Textos agregados a vector store existente")
608
-
609
- except Exception as e:
610
- self.logger.error(f"❌ Error procesando archivo: {e}")
611
- self.logger.exception("Traceback:")
612
- raise
638
+ if self.vector_store is None:
639
+ self.vector_store = FAISS.from_texts(
640
+ [doc.page_content for doc in texts],
641
+ self.embeddings
642
+ )
643
+ else:
644
+ self.vector_store.add_texts([doc.page_content for doc in texts])
613
645
 
614
646
  def clear_memory(self):
615
647
  """
616
648
  Clear conversation history and processed file context.
649
+
650
+ This method resets the bot to a clean state, removing all conversation
651
+ history and processed file context.
617
652
  """
618
- self.logger.info("🗑️ Limpiando memoria")
619
653
  self.chat_history.clear()
620
654
  self.vector_store = None
621
- self.logger.debug("✅ Memoria limpiada")
622
655
 
623
656
  def get_chat_history(self) -> List[BaseMessage]:
624
657
  """
625
658
  Retrieve a copy of the current conversation history.
659
+
660
+ Returns:
661
+ List[BaseMessage]: Copy of the conversation history
626
662
  """
627
663
  return self.chat_history.copy()
628
664
 
629
665
  def set_chat_history(self, history: List[BaseMessage]):
630
666
  """
631
667
  Set the conversation history from a list of BaseMessage instances.
668
+
669
+ Args:
670
+ history (List[BaseMessage]): New conversation history to set
632
671
  """
633
- self.logger.info(f"📝 Estableciendo historial: {len(history)} mensajes")
634
672
  self.chat_history = history.copy()
635
673
 
636
674
  def _get_context(self, query: str) -> str:
637
675
  """
638
676
  Retrieve relevant context from processed files using similarity search.
677
+
678
+ This method performs semantic search over processed file content to find
679
+ the most relevant information for the current query.
680
+
681
+ Args:
682
+ query (str): The query to search for relevant context
683
+
684
+ Returns:
685
+ str: Concatenated relevant context from processed files
639
686
  """
640
687
  if self.vector_store:
641
- self.logger.debug(f"🔍 Buscando contexto para query: {query[:100]}...")
642
688
  docs = self.vector_store.similarity_search(query, k=4)
643
- context = "\n".join([doc.page_content for doc in docs])
644
- self.logger.debug(f"✅ Contexto encontrado: {len(context)} caracteres")
645
- return context
689
+ return "\n".join([doc.page_content for doc in docs])
646
690
  return ""
@@ -2,16 +2,30 @@ from pydantic import BaseModel
2
2
  from typing import Dict, Any, Type
3
3
  from sonika_langchain_bot.langchain_class import ILanguageModel
4
4
 
5
- # Clase para realizar la clasificación de texto
5
+ class ClassificationResponse(BaseModel):
6
+ """Respuesta de clasificación con tokens utilizados"""
7
+ input_tokens: int
8
+ output_tokens: int
9
+ result: Dict[str, Any]
10
+
6
11
  class TextClassifier:
7
12
  def __init__(self, validation_class: Type[BaseModel], llm: ILanguageModel):
8
- self.llm =llm
13
+ self.llm = llm
9
14
  self.validation_class = validation_class
10
- #configuramos el modelo para que tenga una estructura de salida
11
- self.llm.model = self.llm.model.with_structured_output(validation_class)
15
+ # Guardamos ambas versiones del modelo
16
+ self.original_model = self.llm.model # Sin structured output
17
+ self.structured_model = self.llm.model.with_structured_output(validation_class)
12
18
 
13
- def classify(self, text: str) -> Dict[str, Any]:
14
- # Crear el template del prompt
19
+ def classify(self, text: str) -> ClassificationResponse:
20
+ """
21
+ Clasifica el texto según la clase de validación.
22
+
23
+ Args:
24
+ text: Texto a clasificar
25
+
26
+ Returns:
27
+ ClassificationResponse: Objeto con result, input_tokens y output_tokens
28
+ """
15
29
  prompt = f"""
16
30
  Classify the following text based on the properties defined in the validation class.
17
31
 
@@ -19,12 +33,34 @@ class TextClassifier:
19
33
 
20
34
  Only extract the properties mentioned in the validation class.
21
35
  """
22
- response = self.llm.invoke(prompt=prompt)
23
36
 
24
- # Asegurarse de que el `response` es de la clase de validación proporcionada
37
+ # Primero invocamos el modelo ORIGINAL para obtener metadata de tokens
38
+ raw_response = self.original_model.invoke(prompt)
39
+
40
+ # Extraer información de tokens del AIMessage original
41
+ input_tokens = 0
42
+ output_tokens = 0
43
+
44
+ if hasattr(raw_response, 'response_metadata'):
45
+ token_usage = raw_response.response_metadata.get('token_usage', {})
46
+ input_tokens = token_usage.get('prompt_tokens', 0)
47
+ output_tokens = token_usage.get('completion_tokens', 0)
48
+
49
+ # Ahora invocamos con structured output para obtener el objeto parseado
50
+ response = self.structured_model.invoke(prompt)
51
+
52
+ # Validar que el response es de la clase correcta
25
53
  if isinstance(response, self.validation_class):
26
- # Crear el resultado dinámicamente basado en los atributos de la clase de validación
27
- result = {field: getattr(response, field) for field in self.validation_class.__fields__.keys()}
28
- return result
54
+ # Crear el resultado dinámicamente basado en los atributos
55
+ result_data = {
56
+ field: getattr(response, field)
57
+ for field in self.validation_class.__fields__.keys()
58
+ }
59
+
60
+ return ClassificationResponse(
61
+ input_tokens=input_tokens,
62
+ output_tokens=output_tokens,
63
+ result=result_data
64
+ )
29
65
  else:
30
- raise ValueError(f"The response is not of type '{self.validation_class.__name__}'")
66
+ raise ValueError(f"The response is not of type '{self.validation_class.__name__}'")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sonika-langchain-bot
3
- Version: 0.0.17
3
+ Version: 0.0.20
4
4
  Summary: Agente langchain con LLM
5
5
  Author: Erley Blanco Carvajal
6
6
  License: MIT License
@@ -1,15 +1,15 @@
1
1
  sonika_langchain_bot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  sonika_langchain_bot/document_processor.py,sha256=RuHT22Zt-psoe4adFWKwBJ0gi638fq8r2S5WZoDK8fY,10979
3
3
  sonika_langchain_bot/langchain_bdi.py,sha256=ithc55azP5XSPb8AGRUrDGYnVI6I4IqpqElLNat4BAQ,7024
4
- sonika_langchain_bot/langchain_bot_agent.py,sha256=UXcLkyVisyrDXfikKVYj-8l3FvGS8WJUT_G6XpPZO_w,28955
4
+ sonika_langchain_bot/langchain_bot_agent.py,sha256=l1Kj4iDnGSH-1NZkFxdlVCKOQxoDMsPjWNdxS3GapcA,29214
5
5
  sonika_langchain_bot/langchain_bot_agent_bdi.py,sha256=Ev0hhRQYe6kyGAHiFDhFsfu6QnTwUFaA9oB8DfNV7u4,8613
6
- sonika_langchain_bot/langchain_clasificator.py,sha256=GR85ZAliymBSoDa5PXB31BvJkuiokGjS2v3RLdXnzzk,1381
6
+ sonika_langchain_bot/langchain_clasificator.py,sha256=h0-H_1bqgA04rF2ZHh5zOg2PinqTuLQMcSK7AGK4uw8,2583
7
7
  sonika_langchain_bot/langchain_class.py,sha256=5anB6v_wCzEoAJRb8fV9lPPS72E7-k51y_aeiip8RAw,1114
8
8
  sonika_langchain_bot/langchain_files.py,sha256=SEyqnJgBc_nbCIG31eypunBbO33T5AHFOhQZcghTks4,381
9
9
  sonika_langchain_bot/langchain_models.py,sha256=vqSSZ48tNofrTMLv1QugDdyey2MuIeSdlLSD37AnzkI,2235
10
10
  sonika_langchain_bot/langchain_tools.py,sha256=y7wLf1DbUua3QIvz938Ek-JIMOuQhrOIptJadW8OIsU,466
11
- sonika_langchain_bot-0.0.17.dist-info/licenses/LICENSE,sha256=O8VZ4aU_rUMAArvYTm2bshcZ991huv_tpfB5BKHH9Q8,1064
12
- sonika_langchain_bot-0.0.17.dist-info/METADATA,sha256=q7AL7tRyc9_WhSL4bI_h0QWre1YV4qL3sQTI6v2ovd4,6508
13
- sonika_langchain_bot-0.0.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- sonika_langchain_bot-0.0.17.dist-info/top_level.txt,sha256=UsTTSZFEw2wrPSVh4ufu01e2m_E7O_QVYT_k4zCQaAE,21
15
- sonika_langchain_bot-0.0.17.dist-info/RECORD,,
11
+ sonika_langchain_bot-0.0.20.dist-info/licenses/LICENSE,sha256=O8VZ4aU_rUMAArvYTm2bshcZ991huv_tpfB5BKHH9Q8,1064
12
+ sonika_langchain_bot-0.0.20.dist-info/METADATA,sha256=bIPx5NtqGhSIRI1nPP-PZInj8MUhHRheq7VRwQNqOXY,6508
13
+ sonika_langchain_bot-0.0.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ sonika_langchain_bot-0.0.20.dist-info/top_level.txt,sha256=UsTTSZFEw2wrPSVh4ufu01e2m_E7O_QVYT_k4zCQaAE,21
15
+ sonika_langchain_bot-0.0.20.dist-info/RECORD,,