sonika-langchain-bot 0.0.15__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sonika-langchain-bot might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  from typing import Generator, List, Optional, Dict, Any, TypedDict, Annotated
2
2
  import asyncio
3
+ import logging
3
4
  from langchain.schema import AIMessage, HumanMessage, BaseMessage
4
5
  from langchain_core.messages import ToolMessage
5
6
  from langchain.text_splitter import CharacterTextSplitter
@@ -39,6 +40,7 @@ class LangChainBot:
39
40
  - Thread-based conversation persistence
40
41
  - Streaming responses
41
42
  - Backward compatibility with legacy APIs
43
+ - Debug logging injection for production troubleshooting
42
44
  """
43
45
 
44
46
  def __init__(self,
@@ -47,7 +49,8 @@ class LangChainBot:
47
49
  instructions: str,
48
50
  tools: Optional[List[BaseTool]] = None,
49
51
  mcp_servers: Optional[Dict[str, Any]] = None,
50
- use_checkpointer: bool = False):
52
+ use_checkpointer: bool = False,
53
+ logger: Optional[logging.Logger] = None):
51
54
  """
52
55
  Initialize the modern LangGraph bot with optional MCP support.
53
56
 
@@ -58,16 +61,28 @@ class LangChainBot:
58
61
  tools (List[BaseTool], optional): Traditional LangChain tools to bind to the model
59
62
  mcp_servers (Dict[str, Any], optional): MCP server configurations for dynamic tool loading
60
63
  use_checkpointer (bool): Enable automatic conversation persistence using LangGraph checkpoints
64
+ logger (logging.Logger, optional): Logger instance for debugging. If None, uses silent NullHandler
61
65
 
62
66
  Note:
63
67
  The instructions will be automatically enhanced with tool descriptions
64
68
  when tools are provided, eliminating the need for manual tool instruction formatting.
65
69
  """
70
+ # Configure logger (silent by default if not provided)
71
+ self.logger = logger or logging.getLogger(__name__)
72
+ if logger is None:
73
+ self.logger.addHandler(logging.NullHandler())
74
+
75
+ self.logger.info("="*80)
76
+ self.logger.info("🚀 Inicializando LangChainBot")
77
+ self.logger.info("="*80)
78
+
66
79
  # Core components
67
80
  self.language_model = language_model
68
81
  self.embeddings = embeddings
69
82
  self.base_instructions = instructions
70
83
 
84
+ self.logger.debug(f"📋 Instrucciones base: {len(instructions)} caracteres")
85
+
71
86
  # Backward compatibility attributes
72
87
  self.chat_history: List[BaseMessage] = []
73
88
  self.vector_store = None
@@ -76,25 +91,39 @@ class LangChainBot:
76
91
  self.tools = tools or []
77
92
  self.mcp_client = None
78
93
 
94
+ self.logger.info(f"🔧 Herramientas iniciales: {len(self.tools)}")
95
+
79
96
  # Initialize MCP servers if provided
80
97
  if mcp_servers:
98
+ self.logger.info(f"🌐 Servidores MCP detectados: {len(mcp_servers)}")
81
99
  self._initialize_mcp(mcp_servers)
100
+ else:
101
+ self.logger.debug("⚪ Sin servidores MCP configurados")
82
102
 
83
103
  # Configure persistence layer
84
104
  self.checkpointer = MemorySaver() if use_checkpointer else None
105
+ self.logger.debug(f"💾 Checkpointer: {'Habilitado' if use_checkpointer else 'Deshabilitado'}")
85
106
 
86
107
  # Prepare model with bound tools for native function calling
108
+ self.logger.info("🤖 Preparando modelo con herramientas...")
87
109
  self.model_with_tools = self._prepare_model_with_tools()
88
110
 
89
111
  # Build modern instruction set with tool descriptions
112
+ self.logger.info("📝 Construyendo instrucciones modernas...")
90
113
  self.instructions = self._build_modern_instructions()
114
+ self.logger.debug(f"📋 Instrucciones finales: {len(self.instructions)} caracteres")
91
115
 
92
116
  # Create the LangGraph workflow
117
+ self.logger.info("🔄 Creando workflow de LangGraph...")
93
118
  self.graph = self._create_modern_workflow()
94
119
 
95
120
  # Legacy compatibility attributes (maintained for API compatibility)
96
121
  self.conversation = None
97
122
  self.agent_executor = None
123
+
124
+ self.logger.info("✅ LangChainBot inicializado correctamente")
125
+ self.logger.info(f"📊 Resumen: {len(self.tools)} herramientas, {len(self.chat_history)} mensajes en historial")
126
+ self.logger.info("="*80 + "\n")
98
127
 
99
128
  def _initialize_mcp(self, mcp_servers: Dict[str, Any]):
100
129
  """
@@ -117,14 +146,56 @@ class LangChainBot:
117
146
  MCP tools are automatically appended to the existing tools list and
118
147
  will be included in the model's tool binding process.
119
148
  """
149
+ self.logger.info("="*80)
150
+ self.logger.info("🌐 INICIALIZANDO MCP (Model Context Protocol)")
151
+ self.logger.info("="*80)
152
+
120
153
  try:
154
+ self.logger.info(f"📋 Servidores a inicializar: {len(mcp_servers)}")
155
+
156
+ for server_name, server_config in mcp_servers.items():
157
+ self.logger.info(f"\n🔌 Servidor: {server_name}")
158
+ self.logger.debug(f" Command: {server_config.get('command')}")
159
+ self.logger.debug(f" Args: {server_config.get('args')}")
160
+ self.logger.debug(f" Transport: {server_config.get('transport')}")
161
+
162
+ self.logger.info("\n🔄 Creando MultiServerMCPClient...")
121
163
  self.mcp_client = MultiServerMCPClient(mcp_servers)
164
+ self.logger.info("✅ MultiServerMCPClient creado")
165
+
166
+ self.logger.info("🔄 Obteniendo herramientas desde servidores MCP...")
122
167
  mcp_tools = asyncio.run(self.mcp_client.get_tools())
168
+
169
+ self.logger.info(f"📥 Herramientas MCP recibidas: {len(mcp_tools)}")
170
+
171
+ if mcp_tools:
172
+ for i, tool in enumerate(mcp_tools, 1):
173
+ tool_name = getattr(tool, 'name', 'Unknown')
174
+ tool_desc = getattr(tool, 'description', 'Sin descripción')
175
+ self.logger.debug(f" {i}. {tool_name}: {tool_desc[:100]}...")
176
+
123
177
  self.tools.extend(mcp_tools)
124
- print(f"✅ MCP initialized: {len(mcp_tools)} tools from {len(mcp_servers)} servers")
178
+
179
+ self.logger.info(f"✅ MCP inicializado exitosamente")
180
+ self.logger.info(f"📊 Total herramientas disponibles: {len(self.tools)}")
181
+ self.logger.info(f" - Herramientas MCP: {len(mcp_tools)}")
182
+ self.logger.info(f" - Herramientas previas: {len(self.tools) - len(mcp_tools)}")
183
+ self.logger.info("="*80 + "\n")
184
+
125
185
  except Exception as e:
126
- print(f"⚠️ MCP initialization error: {e}")
186
+ self.logger.error("="*80)
187
+ self.logger.error("❌ ERROR EN INICIALIZACIÓN MCP")
188
+ self.logger.error("="*80)
189
+ self.logger.error(f"Tipo de error: {type(e).__name__}")
190
+ self.logger.error(f"Mensaje: {str(e)}")
191
+ self.logger.exception("Traceback completo:")
192
+ self.logger.error("="*80 + "\n")
193
+
127
194
  self.mcp_client = None
195
+
196
+ # Mensaje de diagnóstico
197
+ self.logger.warning("⚠️ Continuando sin MCP - solo herramientas locales disponibles")
198
+ self.logger.warning(f" Herramientas disponibles: {len(self.tools)}")
128
199
 
129
200
  def _prepare_model_with_tools(self):
130
201
  """
@@ -137,13 +208,31 @@ class LangChainBot:
137
208
  The language model with tools bound, or the original model if no tools are available
138
209
  """
139
210
  if self.tools:
140
- return self.language_model.model.bind_tools(self.tools)
141
- return self.language_model.model
211
+ self.logger.info(f"🔗 Vinculando {len(self.tools)} herramientas al modelo")
212
+ try:
213
+ bound_model = self.language_model.model.bind_tools(self.tools)
214
+ self.logger.info("✅ Herramientas vinculadas correctamente")
215
+ return bound_model
216
+ except Exception as e:
217
+ self.logger.error(f"❌ Error vinculando herramientas: {e}")
218
+ self.logger.exception("Traceback:")
219
+ return self.language_model.model
220
+ else:
221
+ self.logger.debug("⚪ Sin herramientas para vincular, usando modelo base")
222
+ return self.language_model.model
142
223
 
143
224
  def _build_modern_instructions(self) -> str:
225
+ """
226
+ Build modern instructions with automatic tool documentation.
227
+
228
+ Returns:
229
+ str: Enhanced instructions with tool descriptions
230
+ """
144
231
  instructions = self.base_instructions
145
232
 
146
233
  if self.tools:
234
+ self.logger.info(f"📝 Generando documentación para {len(self.tools)} herramientas")
235
+
147
236
  tools_description = "\n\n# Available Tools\n\n"
148
237
 
149
238
  for tool in self.tools:
@@ -157,7 +246,7 @@ class LangChainBot:
157
246
  required = "**REQUIRED**" if field_info.is_required() else "*optional*"
158
247
  tools_description += f"- `{field_name}` ({field_info.annotation.__name__}, {required}): {field_info.description}\n"
159
248
 
160
- # Opción 2: args_schema es un dict (MCP Tools) ← NUEVO
249
+ # Opción 2: args_schema es un dict (MCP Tools)
161
250
  elif hasattr(tool, 'args_schema') and isinstance(tool.args_schema, dict):
162
251
  if 'properties' in tool.args_schema:
163
252
  tools_description += f"**Parameters:**\n"
@@ -187,6 +276,7 @@ class LangChainBot:
187
276
  "- Do NOT call tools with empty arguments\n")
188
277
 
189
278
  instructions += tools_description
279
+ self.logger.info(f"✅ Documentación de herramientas agregada ({len(tools_description)} caracteres)")
190
280
 
191
281
  return instructions
192
282
 
@@ -203,24 +293,14 @@ class LangChainBot:
203
293
  Returns:
204
294
  StateGraph: Compiled LangGraph workflow ready for execution
205
295
  """
296
+ self.logger.info("🔄 Construyendo workflow de LangGraph")
206
297
 
207
298
  def agent_node(state: ChatState) -> ChatState:
208
299
  """
209
300
  Main agent node responsible for generating responses and initiating tool calls.
210
-
211
- This node:
212
- 1. Extracts the latest user message from the conversation state
213
- 2. Retrieves relevant context from processed files
214
- 3. Constructs a complete message history for the model
215
- 4. Invokes the model with tool binding for native function calling
216
- 5. Returns updated state with the model's response
217
-
218
- Args:
219
- state (ChatState): Current conversation state
220
-
221
- Returns:
222
- ChatState: Updated state with agent response
223
301
  """
302
+ self.logger.debug("🤖 Ejecutando agent_node")
303
+
224
304
  # Extract the most recent user message
225
305
  last_user_message = None
226
306
  for msg in reversed(state["messages"]):
@@ -229,10 +309,15 @@ class LangChainBot:
229
309
  break
230
310
 
231
311
  if not last_user_message:
312
+ self.logger.warning("⚠️ No se encontró mensaje de usuario")
232
313
  return state
233
314
 
315
+ self.logger.debug(f"💬 Mensaje usuario: {last_user_message[:100]}...")
316
+
234
317
  # Retrieve contextual information from processed files
235
318
  context = self._get_context(last_user_message)
319
+ if context:
320
+ self.logger.debug(f"📚 Contexto recuperado: {len(context)} caracteres")
236
321
 
237
322
  # Build system prompt with optional context
238
323
  system_content = self.instructions
@@ -249,23 +334,33 @@ class LangChainBot:
249
334
  elif isinstance(msg, AIMessage):
250
335
  messages.append({"role": "assistant", "content": msg.content or ""})
251
336
  elif isinstance(msg, ToolMessage):
252
- # Convert tool results to user messages for context
253
337
  messages.append({"role": "user", "content": f"Tool result: {msg.content}"})
254
338
 
339
+ self.logger.debug(f"📨 Enviando {len(messages)} mensajes al modelo")
340
+
255
341
  try:
256
342
  # Invoke model with native tool binding
257
343
  response = self.model_with_tools.invoke(messages)
258
344
 
345
+ self.logger.debug(f"✅ Respuesta recibida del modelo")
346
+
347
+ # Check for tool calls
348
+ if hasattr(response, 'tool_calls') and response.tool_calls:
349
+ self.logger.info(f"🔧 Llamadas a herramientas detectadas: {len(response.tool_calls)}")
350
+ for i, tc in enumerate(response.tool_calls, 1):
351
+ tool_name = tc.get('name', 'Unknown')
352
+ self.logger.debug(f" {i}. {tool_name}")
353
+
259
354
  # Return updated state
260
355
  return {
261
356
  **state,
262
357
  "context": context,
263
- "messages": [response] # add_messages annotation handles proper appending
358
+ "messages": [response]
264
359
  }
265
360
 
266
361
  except Exception as e:
267
- print(f"Error in agent_node: {e}")
268
- # Graceful fallback for error scenarios
362
+ self.logger.error(f"Error en agent_node: {e}")
363
+ self.logger.exception("Traceback:")
269
364
  fallback_response = AIMessage(content="I apologize, but I encountered an error processing your request.")
270
365
  return {
271
366
  **state,
@@ -276,24 +371,16 @@ class LangChainBot:
276
371
  def should_continue(state: ChatState) -> str:
277
372
  """
278
373
  Conditional edge function to determine workflow continuation.
279
-
280
- Analyzes the last message to decide whether to execute tools or end the workflow.
281
- This leverages LangGraph's native tool calling detection.
282
-
283
- Args:
284
- state (ChatState): Current conversation state
285
-
286
- Returns:
287
- str: Next node to execute ("tools" or "end")
288
374
  """
289
375
  last_message = state["messages"][-1]
290
376
 
291
- # Check for pending tool calls using native tool calling detection
292
377
  if (isinstance(last_message, AIMessage) and
293
378
  hasattr(last_message, 'tool_calls') and
294
379
  last_message.tool_calls):
380
+ self.logger.debug("➡️ Continuando a ejecución de herramientas")
295
381
  return "tools"
296
382
 
383
+ self.logger.debug("🏁 Finalizando workflow")
297
384
  return "end"
298
385
 
299
386
  # Construct the workflow graph
@@ -301,18 +388,18 @@ class LangChainBot:
301
388
 
302
389
  # Add primary agent node
303
390
  workflow.add_node("agent", agent_node)
391
+ self.logger.debug("✅ Nodo 'agent' agregado")
304
392
 
305
393
  # Add tool execution node if tools are available
306
394
  if self.tools:
307
- # ToolNode automatically handles tool execution and result formatting
308
395
  tool_node = ToolNode(self.tools)
309
396
  workflow.add_node("tools", tool_node)
397
+ self.logger.debug("✅ Nodo 'tools' agregado")
310
398
 
311
399
  # Define workflow edges and entry point
312
400
  workflow.set_entry_point("agent")
313
401
 
314
402
  if self.tools:
315
- # Conditional routing based on tool call presence
316
403
  workflow.add_conditional_edges(
317
404
  "agent",
318
405
  should_continue,
@@ -321,17 +408,21 @@ class LangChainBot:
321
408
  "end": END
322
409
  }
323
410
  )
324
- # Return to agent after tool execution for final response formatting
325
411
  workflow.add_edge("tools", "agent")
412
+ self.logger.debug("✅ Edges condicionales configurados")
326
413
  else:
327
- # Direct termination if no tools are available
328
414
  workflow.add_edge("agent", END)
415
+ self.logger.debug("✅ Edge directo a END configurado")
329
416
 
330
417
  # Compile workflow with optional checkpointing
331
418
  if self.checkpointer:
332
- return workflow.compile(checkpointer=self.checkpointer)
419
+ compiled = workflow.compile(checkpointer=self.checkpointer)
420
+ self.logger.info("✅ Workflow compilado con checkpointer")
333
421
  else:
334
- return workflow.compile()
422
+ compiled = workflow.compile()
423
+ self.logger.info("✅ Workflow compilado sin checkpointer")
424
+
425
+ return compiled
335
426
 
336
427
  # ===== LEGACY API COMPATIBILITY =====
337
428
 
@@ -355,61 +446,70 @@ class LangChainBot:
355
446
  This method automatically handles tool execution and context integration
356
447
  from processed files while maintaining the original API signature.
357
448
  """
449
+ self.logger.info("="*80)
450
+ self.logger.info("📨 GET_RESPONSE llamado")
451
+ self.logger.debug(f"💬 Input: {user_input[:200]}...")
452
+
358
453
  # Prepare initial workflow state
359
454
  initial_state = {
360
455
  "messages": self.chat_history + [HumanMessage(content=user_input)],
361
456
  "context": ""
362
457
  }
363
458
 
364
- # Execute the LangGraph workflow
365
- #result = self.graph.invoke(initial_state)
366
-
367
- # Siempre usar ainvoke (funciona para ambos casos)
368
- result = asyncio.run(self.graph.ainvoke(initial_state))
369
-
370
- # Update internal conversation history
371
- self.chat_history = result["messages"]
372
-
373
- # Extract final response from the last assistant message
374
- final_response = ""
375
- total_input_tokens = 0
376
- total_output_tokens = 0
377
-
378
- for msg in reversed(result["messages"]):
379
- if isinstance(msg, AIMessage) and msg.content:
380
- final_response = msg.content
381
- break
382
-
383
- # Extract token usage from response metadata
384
- last_message = result["messages"][-1]
385
- if hasattr(last_message, 'response_metadata'):
386
- token_usage = last_message.response_metadata.get('token_usage', {})
387
- total_input_tokens = token_usage.get('prompt_tokens', 0)
388
- total_output_tokens = token_usage.get('completion_tokens', 0)
389
-
390
- return ResponseModel(
391
- user_tokens=total_input_tokens,
392
- bot_tokens=total_output_tokens,
393
- response=final_response
394
- )
459
+ self.logger.debug(f"📊 Estado inicial: {len(initial_state['messages'])} mensajes")
460
+
461
+ try:
462
+ # Execute the LangGraph workflow
463
+ self.logger.info("🔄 Ejecutando workflow...")
464
+ result = asyncio.run(self.graph.ainvoke(initial_state))
465
+ self.logger.info("✅ Workflow completado")
466
+
467
+ # Update internal conversation history
468
+ self.chat_history = result["messages"]
469
+ self.logger.debug(f"💾 Historial actualizado: {len(self.chat_history)} mensajes")
470
+
471
+ # Extract final response from the last assistant message
472
+ final_response = ""
473
+ total_input_tokens = 0
474
+ total_output_tokens = 0
475
+
476
+ for msg in reversed(result["messages"]):
477
+ if isinstance(msg, AIMessage) and msg.content:
478
+ final_response = msg.content
479
+ break
480
+
481
+ # Extract token usage from response metadata
482
+ last_message = result["messages"][-1]
483
+ if hasattr(last_message, 'response_metadata'):
484
+ token_usage = last_message.response_metadata.get('token_usage', {})
485
+ total_input_tokens = token_usage.get('prompt_tokens', 0)
486
+ total_output_tokens = token_usage.get('completion_tokens', 0)
487
+
488
+ self.logger.info(f"📊 Tokens: input={total_input_tokens}, output={total_output_tokens}")
489
+ self.logger.info(f"📝 Respuesta: {len(final_response)} caracteres")
490
+ self.logger.info("="*80 + "\n")
491
+
492
+ return ResponseModel(
493
+ user_tokens=total_input_tokens,
494
+ bot_tokens=total_output_tokens,
495
+ response=final_response
496
+ )
497
+
498
+ except Exception as e:
499
+ self.logger.error("="*80)
500
+ self.logger.error("❌ ERROR EN GET_RESPONSE")
501
+ self.logger.error(f"Mensaje: {str(e)}")
502
+ self.logger.exception("Traceback:")
503
+ self.logger.error("="*80 + "\n")
504
+ raise
395
505
 
396
506
  def get_response_stream(self, user_input: str) -> Generator[str, None, None]:
397
507
  """
398
508
  Generate a streaming response for real-time user interaction.
399
-
400
- This method provides streaming capabilities while maintaining backward
401
- compatibility with the original API.
402
-
403
- Args:
404
- user_input (str): The user's message or query
405
-
406
- Yields:
407
- str: Response chunks as they are generated
408
-
409
- Note:
410
- Current implementation streams complete responses. For token-level
411
- streaming, consider using the model's native streaming capabilities.
412
509
  """
510
+ self.logger.info("📨 GET_RESPONSE_STREAM llamado")
511
+ self.logger.debug(f"💬 Input: {user_input[:200]}...")
512
+
413
513
  initial_state = {
414
514
  "messages": self.chat_history + [HumanMessage(content=user_input)],
415
515
  "context": ""
@@ -417,156 +517,105 @@ class LangChainBot:
417
517
 
418
518
  accumulated_response = ""
419
519
 
420
- # Stream workflow execution
421
- for chunk in self.graph.stream(initial_state):
422
- # Extract content from workflow chunks
423
- if "agent" in chunk:
424
- for message in chunk["agent"]["messages"]:
425
- if isinstance(message, AIMessage) and message.content:
426
- # Stream complete responses (can be enhanced for token-level streaming)
427
- accumulated_response = message.content
428
- yield message.content
429
-
430
- # Update conversation history after streaming completion
431
- if accumulated_response:
432
- self.chat_history.extend([
433
- HumanMessage(content=user_input),
434
- AIMessage(content=accumulated_response)
435
- ])
520
+ try:
521
+ for chunk in self.graph.stream(initial_state):
522
+ if "agent" in chunk:
523
+ for message in chunk["agent"]["messages"]:
524
+ if isinstance(message, AIMessage) and message.content:
525
+ accumulated_response = message.content
526
+ yield message.content
527
+
528
+ if accumulated_response:
529
+ self.chat_history.extend([
530
+ HumanMessage(content=user_input),
531
+ AIMessage(content=accumulated_response)
532
+ ])
533
+
534
+ self.logger.info(f"✅ Stream completado: {len(accumulated_response)} caracteres")
535
+
536
+ except Exception as e:
537
+ self.logger.error(f"❌ Error en stream: {e}")
538
+ self.logger.exception("Traceback:")
539
+ raise
436
540
 
437
541
  def load_conversation_history(self, messages: List[Message]):
438
542
  """
439
543
  Load conversation history from Django model instances.
440
-
441
- This method maintains compatibility with existing Django-based conversation
442
- storage while preparing the history for modern LangGraph processing.
443
-
444
- Args:
445
- messages (List[Message]): List of Django Message model instances
446
- Expected to have 'content' and 'is_bot' attributes
447
544
  """
545
+ self.logger.info(f"📥 Cargando historial: {len(messages)} mensajes")
448
546
  self.chat_history.clear()
449
547
  for message in messages:
450
548
  if message.is_bot:
451
549
  self.chat_history.append(AIMessage(content=message.content))
452
550
  else:
453
551
  self.chat_history.append(HumanMessage(content=message.content))
552
+ self.logger.debug("✅ Historial cargado")
454
553
 
455
554
  def save_messages(self, user_message: str, bot_response: str):
456
555
  """
457
556
  Save messages to internal conversation history.
458
-
459
- This method provides backward compatibility for manual history management.
460
-
461
- Args:
462
- user_message (str): The user's input message
463
- bot_response (str): The bot's generated response
464
557
  """
558
+ self.logger.debug("💾 Guardando mensajes en historial interno")
465
559
  self.chat_history.append(HumanMessage(content=user_message))
466
560
  self.chat_history.append(AIMessage(content=bot_response))
467
561
 
468
562
  def process_file(self, file: FileProcessorInterface):
469
563
  """
470
564
  Process and index a file for contextual retrieval.
471
-
472
- This method maintains compatibility with existing file processing workflows
473
- while leveraging FAISS for efficient similarity search.
474
-
475
- Args:
476
- file (FileProcessorInterface): File processor instance that implements getText()
477
-
478
- Note:
479
- Processed files are automatically available for context retrieval
480
- in subsequent conversations without additional configuration.
481
565
  """
482
- document = file.getText()
483
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
484
- texts = text_splitter.split_documents(document)
566
+ self.logger.info("📄 Procesando archivo para indexación")
567
+ try:
568
+ document = file.getText()
569
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
570
+ texts = text_splitter.split_documents(document)
571
+
572
+ self.logger.debug(f"✂️ Documento dividido en {len(texts)} chunks")
485
573
 
486
- if self.vector_store is None:
487
- self.vector_store = FAISS.from_texts(
488
- [doc.page_content for doc in texts],
489
- self.embeddings
490
- )
491
- else:
492
- self.vector_store.add_texts([doc.page_content for doc in texts])
574
+ if self.vector_store is None:
575
+ self.vector_store = FAISS.from_texts(
576
+ [doc.page_content for doc in texts],
577
+ self.embeddings
578
+ )
579
+ self.logger.info("✅ Vector store creado")
580
+ else:
581
+ self.vector_store.add_texts([doc.page_content for doc in texts])
582
+ self.logger.info("✅ Textos agregados a vector store existente")
583
+
584
+ except Exception as e:
585
+ self.logger.error(f"❌ Error procesando archivo: {e}")
586
+ self.logger.exception("Traceback:")
587
+ raise
493
588
 
494
589
  def clear_memory(self):
495
590
  """
496
591
  Clear conversation history and processed file context.
497
-
498
- This method resets the bot to a clean state, removing all conversation
499
- history and processed file context.
500
592
  """
593
+ self.logger.info("🗑️ Limpiando memoria")
501
594
  self.chat_history.clear()
502
595
  self.vector_store = None
596
+ self.logger.debug("✅ Memoria limpiada")
503
597
 
504
598
  def get_chat_history(self) -> List[BaseMessage]:
505
599
  """
506
600
  Retrieve a copy of the current conversation history.
507
-
508
- Returns:
509
- List[BaseMessage]: Copy of the conversation history
510
601
  """
511
602
  return self.chat_history.copy()
512
603
 
513
604
  def set_chat_history(self, history: List[BaseMessage]):
514
605
  """
515
606
  Set the conversation history from a list of BaseMessage instances.
516
-
517
- Args:
518
- history (List[BaseMessage]): New conversation history to set
519
607
  """
608
+ self.logger.info(f"📝 Estableciendo historial: {len(history)} mensajes")
520
609
  self.chat_history = history.copy()
521
610
 
522
611
  def _get_context(self, query: str) -> str:
523
612
  """
524
613
  Retrieve relevant context from processed files using similarity search.
525
-
526
- This method performs semantic search over processed file content to find
527
- the most relevant information for the current query.
528
-
529
- Args:
530
- query (str): The query to search for relevant context
531
-
532
- Returns:
533
- str: Concatenated relevant context from processed files
534
614
  """
535
615
  if self.vector_store:
616
+ self.logger.debug(f"🔍 Buscando contexto para query: {query[:100]}...")
536
617
  docs = self.vector_store.similarity_search(query, k=4)
537
- return "\n".join([doc.page_content for doc in docs])
538
- return ""
539
-
540
- def process_file(self, file: FileProcessorInterface):
541
- """API original - Procesa archivo y lo añade al vector store"""
542
- document = file.getText()
543
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
544
- texts = text_splitter.split_documents(document)
545
-
546
- if self.vector_store is None:
547
- self.vector_store = FAISS.from_texts(
548
- [doc.page_content for doc in texts],
549
- self.embeddings
550
- )
551
- else:
552
- self.vector_store.add_texts([doc.page_content for doc in texts])
553
-
554
- def clear_memory(self):
555
- """API original - Limpia la memoria de conversación"""
556
- self.chat_history.clear()
557
- self.vector_store = None
558
-
559
- def get_chat_history(self) -> List[BaseMessage]:
560
- """API original - Obtiene el historial completo"""
561
- return self.chat_history.copy()
562
-
563
- def set_chat_history(self, history: List[BaseMessage]):
564
- """API original - Establece el historial de conversación"""
565
- self.chat_history = history.copy()
566
-
567
- def _get_context(self, query: str) -> str:
568
- """Obtiene contexto relevante de archivos procesados"""
569
- if self.vector_store:
570
- docs = self.vector_store.similarity_search(query, k=4)
571
- return "\n".join([doc.page_content for doc in docs])
618
+ context = "\n".join([doc.page_content for doc in docs])
619
+ self.logger.debug(f"✅ Contexto encontrado: {len(context)} caracteres")
620
+ return context
572
621
  return ""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sonika-langchain-bot
3
- Version: 0.0.15
3
+ Version: 0.0.16
4
4
  Summary: Agente langchain con LLM
5
5
  Author: Erley Blanco Carvajal
6
6
  License: MIT License
@@ -1,15 +1,15 @@
1
1
  sonika_langchain_bot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  sonika_langchain_bot/document_processor.py,sha256=RuHT22Zt-psoe4adFWKwBJ0gi638fq8r2S5WZoDK8fY,10979
3
3
  sonika_langchain_bot/langchain_bdi.py,sha256=ithc55azP5XSPb8AGRUrDGYnVI6I4IqpqElLNat4BAQ,7024
4
- sonika_langchain_bot/langchain_bot_agent.py,sha256=LlzrINl543dPwizkQ-tW47OWzud0sP18Uwb-ZhxMHeA,23968
4
+ sonika_langchain_bot/langchain_bot_agent.py,sha256=VfYx5HFhZvslKoC-2aH5d8iGi1VCwVkfCnWIOhiPXHA,27752
5
5
  sonika_langchain_bot/langchain_bot_agent_bdi.py,sha256=Ev0hhRQYe6kyGAHiFDhFsfu6QnTwUFaA9oB8DfNV7u4,8613
6
6
  sonika_langchain_bot/langchain_clasificator.py,sha256=GR85ZAliymBSoDa5PXB31BvJkuiokGjS2v3RLdXnzzk,1381
7
7
  sonika_langchain_bot/langchain_class.py,sha256=5anB6v_wCzEoAJRb8fV9lPPS72E7-k51y_aeiip8RAw,1114
8
8
  sonika_langchain_bot/langchain_files.py,sha256=SEyqnJgBc_nbCIG31eypunBbO33T5AHFOhQZcghTks4,381
9
9
  sonika_langchain_bot/langchain_models.py,sha256=vqSSZ48tNofrTMLv1QugDdyey2MuIeSdlLSD37AnzkI,2235
10
10
  sonika_langchain_bot/langchain_tools.py,sha256=y7wLf1DbUua3QIvz938Ek-JIMOuQhrOIptJadW8OIsU,466
11
- sonika_langchain_bot-0.0.15.dist-info/licenses/LICENSE,sha256=O8VZ4aU_rUMAArvYTm2bshcZ991huv_tpfB5BKHH9Q8,1064
12
- sonika_langchain_bot-0.0.15.dist-info/METADATA,sha256=TkIrUOf7OyjqybcPfdxsJkIAr_uKYPeh3cY1oVe8f4w,6508
13
- sonika_langchain_bot-0.0.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- sonika_langchain_bot-0.0.15.dist-info/top_level.txt,sha256=UsTTSZFEw2wrPSVh4ufu01e2m_E7O_QVYT_k4zCQaAE,21
15
- sonika_langchain_bot-0.0.15.dist-info/RECORD,,
11
+ sonika_langchain_bot-0.0.16.dist-info/licenses/LICENSE,sha256=O8VZ4aU_rUMAArvYTm2bshcZ991huv_tpfB5BKHH9Q8,1064
12
+ sonika_langchain_bot-0.0.16.dist-info/METADATA,sha256=W4D-pG07cUdhxOruzIq38scWPJAdmKdyJHcT-hvkklI,6508
13
+ sonika_langchain_bot-0.0.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ sonika_langchain_bot-0.0.16.dist-info/top_level.txt,sha256=UsTTSZFEw2wrPSVh4ufu01e2m_E7O_QVYT_k4zCQaAE,21
15
+ sonika_langchain_bot-0.0.16.dist-info/RECORD,,