sonika-langchain-bot 0.0.17__tar.gz → 0.0.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sonika-langchain-bot might be problematic. Click here for more details.

Files changed (21) hide show
  1. {sonika_langchain_bot-0.0.17/src/sonika_langchain_bot.egg-info → sonika_langchain_bot-0.0.19}/PKG-INFO +1 -1
  2. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/setup.py +1 -1
  3. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/langchain_bot_agent.py +184 -251
  4. sonika_langchain_bot-0.0.19/src/sonika_langchain_bot/langchain_clasificator.py +66 -0
  5. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19/src/sonika_langchain_bot.egg-info}/PKG-INFO +1 -1
  6. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/test/test.py +2 -2
  7. sonika_langchain_bot-0.0.17/src/sonika_langchain_bot/langchain_clasificator.py +0 -30
  8. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/LICENSE +0 -0
  9. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/README.md +0 -0
  10. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/setup.cfg +0 -0
  11. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/__init__.py +0 -0
  12. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/document_processor.py +0 -0
  13. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/langchain_class.py +0 -0
  14. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/langchain_files.py +0 -0
  15. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/langchain_models.py +0 -0
  16. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot/langchain_tools.py +0 -0
  17. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot.egg-info/SOURCES.txt +0 -0
  18. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot.egg-info/dependency_links.txt +0 -0
  19. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot.egg-info/requires.txt +0 -0
  20. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/src/sonika_langchain_bot.egg-info/top_level.txt +0 -0
  21. {sonika_langchain_bot-0.0.17 → sonika_langchain_bot-0.0.19}/test/test_document_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sonika-langchain-bot
3
- Version: 0.0.17
3
+ Version: 0.0.19
4
4
  Summary: Agente langchain con LLM
5
5
  Author: Erley Blanco Carvajal
6
6
  License: MIT License
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="sonika-langchain-bot",
5
- version="0.0.17",
5
+ version="0.0.19",
6
6
  description="Agente langchain con LLM",
7
7
  author="Erley Blanco Carvajal",
8
8
  license="MIT License",
@@ -40,7 +40,6 @@ class LangChainBot:
40
40
  - Thread-based conversation persistence
41
41
  - Streaming responses
42
42
  - Backward compatibility with legacy APIs
43
- - Debug logging injection for production troubleshooting
44
43
  """
45
44
 
46
45
  def __init__(self,
@@ -61,7 +60,7 @@ class LangChainBot:
61
60
  tools (List[BaseTool], optional): Traditional LangChain tools to bind to the model
62
61
  mcp_servers (Dict[str, Any], optional): MCP server configurations for dynamic tool loading
63
62
  use_checkpointer (bool): Enable automatic conversation persistence using LangGraph checkpoints
64
- logger (logging.Logger, optional): Logger instance for debugging. If None, uses silent NullHandler
63
+ logger (Optional[logging.Logger]): Logger instance for error tracking (silent by default if not provided)
65
64
 
66
65
  Note:
67
66
  The instructions will be automatically enhanced with tool descriptions
@@ -72,17 +71,11 @@ class LangChainBot:
72
71
  if logger is None:
73
72
  self.logger.addHandler(logging.NullHandler())
74
73
 
75
- self.logger.info("="*80)
76
- self.logger.info("🚀 Inicializando LangChainBot")
77
- self.logger.info("="*80)
78
-
79
74
  # Core components
80
75
  self.language_model = language_model
81
76
  self.embeddings = embeddings
82
77
  self.base_instructions = instructions
83
78
 
84
- self.logger.debug(f"📋 Instrucciones base: {len(instructions)} caracteres")
85
-
86
79
  # Backward compatibility attributes
87
80
  self.chat_history: List[BaseMessage] = []
88
81
  self.vector_store = None
@@ -91,39 +84,25 @@ class LangChainBot:
91
84
  self.tools = tools or []
92
85
  self.mcp_client = None
93
86
 
94
- self.logger.info(f"🔧 Herramientas iniciales: {len(self.tools)}")
95
-
96
87
  # Initialize MCP servers if provided
97
88
  if mcp_servers:
98
- self.logger.info(f"🌐 Servidores MCP detectados: {len(mcp_servers)}")
99
89
  self._initialize_mcp(mcp_servers)
100
- else:
101
- self.logger.debug("⚪ Sin servidores MCP configurados")
102
90
 
103
91
  # Configure persistence layer
104
92
  self.checkpointer = MemorySaver() if use_checkpointer else None
105
- self.logger.debug(f"💾 Checkpointer: {'Habilitado' if use_checkpointer else 'Deshabilitado'}")
106
93
 
107
94
  # Prepare model with bound tools for native function calling
108
- self.logger.info("🤖 Preparando modelo con herramientas...")
109
95
  self.model_with_tools = self._prepare_model_with_tools()
110
96
 
111
97
  # Build modern instruction set with tool descriptions
112
- self.logger.info("📝 Construyendo instrucciones modernas...")
113
98
  self.instructions = self._build_modern_instructions()
114
- self.logger.debug(f"📋 Instrucciones finales: {len(self.instructions)} caracteres")
115
99
 
116
100
  # Create the LangGraph workflow
117
- self.logger.info("🔄 Creando workflow de LangGraph...")
118
101
  self.graph = self._create_modern_workflow()
119
102
 
120
103
  # Legacy compatibility attributes (maintained for API compatibility)
121
104
  self.conversation = None
122
105
  self.agent_executor = None
123
-
124
- self.logger.info("✅ LangChainBot inicializado correctamente")
125
- self.logger.info(f"📊 Resumen: {len(self.tools)} herramientas, {len(self.chat_history)} mensajes en historial")
126
- self.logger.info("="*80 + "\n")
127
106
 
128
107
  def _initialize_mcp(self, mcp_servers: Dict[str, Any]):
129
108
  """
@@ -146,81 +125,14 @@ class LangChainBot:
146
125
  MCP tools are automatically appended to the existing tools list and
147
126
  will be included in the model's tool binding process.
148
127
  """
149
- self.logger.info("="*80)
150
- self.logger.info("🌐 INICIALIZANDO MCP (Model Context Protocol)")
151
- self.logger.info("="*80)
152
-
153
128
  try:
154
- self.logger.info(f"📋 Servidores a inicializar: {len(mcp_servers)}")
155
-
156
- for server_name, server_config in mcp_servers.items():
157
- self.logger.info(f"\n🔌 Servidor: {server_name}")
158
- self.logger.debug(f" Command: {server_config.get('command')}")
159
- self.logger.debug(f" Args: {server_config.get('args')}")
160
- self.logger.debug(f" Transport: {server_config.get('transport')}")
161
-
162
- self.logger.info("\n🔄 Creando MultiServerMCPClient...")
163
129
  self.mcp_client = MultiServerMCPClient(mcp_servers)
164
- self.logger.info("✅ MultiServerMCPClient creado")
165
-
166
- # ===== FIX PARA APACHE/MOD_WSGI =====
167
- self.logger.info("🔧 Aplicando fix para compatibilidad Apache/mod_wsgi...")
168
-
169
- import subprocess
170
- original_create = asyncio.create_subprocess_exec
171
-
172
- async def fixed_create(*args, stdin=None, stdout=None, stderr=None, **kwargs):
173
- """Forzar PIPE para evitar heredar sys.stderr de Apache"""
174
- return await original_create(
175
- *args,
176
- stdin=stdin or subprocess.PIPE,
177
- stdout=stdout or subprocess.PIPE,
178
- stderr=stderr or subprocess.PIPE,
179
- **kwargs
180
- )
181
-
182
- # Aplicar parche temporalmente
183
- asyncio.create_subprocess_exec = fixed_create
184
- self.logger.debug("✅ Parche temporal aplicado a asyncio.create_subprocess_exec")
185
-
186
- try:
187
- self.logger.info("🔄 Obteniendo herramientas desde servidores MCP...")
188
- mcp_tools = asyncio.run(self.mcp_client.get_tools())
189
- self.logger.info(f"📥 Herramientas MCP recibidas: {len(mcp_tools)}")
190
- finally:
191
- # Restaurar original
192
- asyncio.create_subprocess_exec = original_create
193
- self.logger.debug("✅ Parche temporal removido, asyncio restaurado")
194
- # =====================================
195
-
196
- if mcp_tools:
197
- for i, tool in enumerate(mcp_tools, 1):
198
- tool_name = getattr(tool, 'name', 'Unknown')
199
- tool_desc = getattr(tool, 'description', 'Sin descripción')
200
- self.logger.debug(f" {i}. {tool_name}: {tool_desc[:100]}...")
201
-
130
+ mcp_tools = asyncio.run(self.mcp_client.get_tools())
202
131
  self.tools.extend(mcp_tools)
203
-
204
- self.logger.info(f"✅ MCP inicializado exitosamente")
205
- self.logger.info(f"📊 Total herramientas disponibles: {len(self.tools)}")
206
- self.logger.info(f" - Herramientas MCP: {len(mcp_tools)}")
207
- self.logger.info(f" - Herramientas previas: {len(self.tools) - len(mcp_tools)}")
208
- self.logger.info("="*80 + "\n")
209
-
210
132
  except Exception as e:
211
- self.logger.error("="*80)
212
- self.logger.error("❌ ERROR EN INICIALIZACIÓN MCP")
213
- self.logger.error("="*80)
214
- self.logger.error(f"Tipo de error: {type(e).__name__}")
215
- self.logger.error(f"Mensaje: {str(e)}")
133
+ self.logger.error(f"Error inicializando MCP: {e}")
216
134
  self.logger.exception("Traceback completo:")
217
- self.logger.error("="*80 + "\n")
218
-
219
135
  self.mcp_client = None
220
-
221
- # Mensaje de diagnóstico
222
- self.logger.warning("⚠️ Continuando sin MCP - solo herramientas locales disponibles")
223
- self.logger.warning(f" Herramientas disponibles: {len(self.tools)}")
224
136
 
225
137
  def _prepare_model_with_tools(self):
226
138
  """
@@ -233,31 +145,13 @@ class LangChainBot:
233
145
  The language model with tools bound, or the original model if no tools are available
234
146
  """
235
147
  if self.tools:
236
- self.logger.info(f"🔗 Vinculando {len(self.tools)} herramientas al modelo")
237
- try:
238
- bound_model = self.language_model.model.bind_tools(self.tools)
239
- self.logger.info("✅ Herramientas vinculadas correctamente")
240
- return bound_model
241
- except Exception as e:
242
- self.logger.error(f"❌ Error vinculando herramientas: {e}")
243
- self.logger.exception("Traceback:")
244
- return self.language_model.model
245
- else:
246
- self.logger.debug("⚪ Sin herramientas para vincular, usando modelo base")
247
- return self.language_model.model
148
+ return self.language_model.model.bind_tools(self.tools)
149
+ return self.language_model.model
248
150
 
249
151
  def _build_modern_instructions(self) -> str:
250
- """
251
- Build modern instructions with automatic tool documentation.
252
-
253
- Returns:
254
- str: Enhanced instructions with tool descriptions
255
- """
256
152
  instructions = self.base_instructions
257
153
 
258
154
  if self.tools:
259
- self.logger.info(f"📝 Generando documentación para {len(self.tools)} herramientas")
260
-
261
155
  tools_description = "\n\n# Available Tools\n\n"
262
156
 
263
157
  for tool in self.tools:
@@ -271,7 +165,7 @@ class LangChainBot:
271
165
  required = "**REQUIRED**" if field_info.is_required() else "*optional*"
272
166
  tools_description += f"- `{field_name}` ({field_info.annotation.__name__}, {required}): {field_info.description}\n"
273
167
 
274
- # Opción 2: args_schema es un dict (MCP Tools)
168
+ # Opción 2: args_schema es un dict (MCP Tools) ← NUEVO
275
169
  elif hasattr(tool, 'args_schema') and isinstance(tool.args_schema, dict):
276
170
  if 'properties' in tool.args_schema:
277
171
  tools_description += f"**Parameters:**\n"
@@ -301,7 +195,6 @@ class LangChainBot:
301
195
  "- Do NOT call tools with empty arguments\n")
302
196
 
303
197
  instructions += tools_description
304
- self.logger.info(f"✅ Documentación de herramientas agregada ({len(tools_description)} caracteres)")
305
198
 
306
199
  return instructions
307
200
 
@@ -318,14 +211,24 @@ class LangChainBot:
318
211
  Returns:
319
212
  StateGraph: Compiled LangGraph workflow ready for execution
320
213
  """
321
- self.logger.info("🔄 Construyendo workflow de LangGraph")
322
214
 
323
215
  def agent_node(state: ChatState) -> ChatState:
324
216
  """
325
217
  Main agent node responsible for generating responses and initiating tool calls.
326
- """
327
- self.logger.debug("🤖 Ejecutando agent_node")
328
218
 
219
+ This node:
220
+ 1. Extracts the latest user message from the conversation state
221
+ 2. Retrieves relevant context from processed files
222
+ 3. Constructs a complete message history for the model
223
+ 4. Invokes the model with tool binding for native function calling
224
+ 5. Returns updated state with the model's response
225
+
226
+ Args:
227
+ state (ChatState): Current conversation state
228
+
229
+ Returns:
230
+ ChatState: Updated state with agent response
231
+ """
329
232
  # Extract the most recent user message
330
233
  last_user_message = None
331
234
  for msg in reversed(state["messages"]):
@@ -334,15 +237,10 @@ class LangChainBot:
334
237
  break
335
238
 
336
239
  if not last_user_message:
337
- self.logger.warning("⚠️ No se encontró mensaje de usuario")
338
240
  return state
339
241
 
340
- self.logger.debug(f"💬 Mensaje usuario: {last_user_message[:100]}...")
341
-
342
242
  # Retrieve contextual information from processed files
343
243
  context = self._get_context(last_user_message)
344
- if context:
345
- self.logger.debug(f"📚 Contexto recuperado: {len(context)} caracteres")
346
244
 
347
245
  # Build system prompt with optional context
348
246
  system_content = self.instructions
@@ -359,33 +257,24 @@ class LangChainBot:
359
257
  elif isinstance(msg, AIMessage):
360
258
  messages.append({"role": "assistant", "content": msg.content or ""})
361
259
  elif isinstance(msg, ToolMessage):
260
+ # Convert tool results to user messages for context
362
261
  messages.append({"role": "user", "content": f"Tool result: {msg.content}"})
363
262
 
364
- self.logger.debug(f"📨 Enviando {len(messages)} mensajes al modelo")
365
-
366
263
  try:
367
264
  # Invoke model with native tool binding
368
265
  response = self.model_with_tools.invoke(messages)
369
266
 
370
- self.logger.debug(f"✅ Respuesta recibida del modelo")
371
-
372
- # Check for tool calls
373
- if hasattr(response, 'tool_calls') and response.tool_calls:
374
- self.logger.info(f"🔧 Llamadas a herramientas detectadas: {len(response.tool_calls)}")
375
- for i, tc in enumerate(response.tool_calls, 1):
376
- tool_name = tc.get('name', 'Unknown')
377
- self.logger.debug(f" {i}. {tool_name}")
378
-
379
267
  # Return updated state
380
268
  return {
381
269
  **state,
382
270
  "context": context,
383
- "messages": [response]
271
+ "messages": [response] # add_messages annotation handles proper appending
384
272
  }
385
273
 
386
274
  except Exception as e:
387
- self.logger.error(f"Error en agent_node: {e}")
388
- self.logger.exception("Traceback:")
275
+ self.logger.error(f"Error en agent_node: {e}")
276
+ self.logger.exception("Traceback completo:")
277
+ # Graceful fallback for error scenarios
389
278
  fallback_response = AIMessage(content="I apologize, but I encountered an error processing your request.")
390
279
  return {
391
280
  **state,
@@ -396,16 +285,24 @@ class LangChainBot:
396
285
  def should_continue(state: ChatState) -> str:
397
286
  """
398
287
  Conditional edge function to determine workflow continuation.
288
+
289
+ Analyzes the last message to decide whether to execute tools or end the workflow.
290
+ This leverages LangGraph's native tool calling detection.
291
+
292
+ Args:
293
+ state (ChatState): Current conversation state
294
+
295
+ Returns:
296
+ str: Next node to execute ("tools" or "end")
399
297
  """
400
298
  last_message = state["messages"][-1]
401
299
 
300
+ # Check for pending tool calls using native tool calling detection
402
301
  if (isinstance(last_message, AIMessage) and
403
302
  hasattr(last_message, 'tool_calls') and
404
303
  last_message.tool_calls):
405
- self.logger.debug("➡️ Continuando a ejecución de herramientas")
406
304
  return "tools"
407
305
 
408
- self.logger.debug("🏁 Finalizando workflow")
409
306
  return "end"
410
307
 
411
308
  # Construct the workflow graph
@@ -413,18 +310,18 @@ class LangChainBot:
413
310
 
414
311
  # Add primary agent node
415
312
  workflow.add_node("agent", agent_node)
416
- self.logger.debug("✅ Nodo 'agent' agregado")
417
313
 
418
314
  # Add tool execution node if tools are available
419
315
  if self.tools:
316
+ # ToolNode automatically handles tool execution and result formatting
420
317
  tool_node = ToolNode(self.tools)
421
318
  workflow.add_node("tools", tool_node)
422
- self.logger.debug("✅ Nodo 'tools' agregado")
423
319
 
424
320
  # Define workflow edges and entry point
425
321
  workflow.set_entry_point("agent")
426
322
 
427
323
  if self.tools:
324
+ # Conditional routing based on tool call presence
428
325
  workflow.add_conditional_edges(
429
326
  "agent",
430
327
  should_continue,
@@ -433,21 +330,17 @@ class LangChainBot:
433
330
  "end": END
434
331
  }
435
332
  )
333
+ # Return to agent after tool execution for final response formatting
436
334
  workflow.add_edge("tools", "agent")
437
- self.logger.debug("✅ Edges condicionales configurados")
438
335
  else:
336
+ # Direct termination if no tools are available
439
337
  workflow.add_edge("agent", END)
440
- self.logger.debug("✅ Edge directo a END configurado")
441
338
 
442
339
  # Compile workflow with optional checkpointing
443
340
  if self.checkpointer:
444
- compiled = workflow.compile(checkpointer=self.checkpointer)
445
- self.logger.info("✅ Workflow compilado con checkpointer")
341
+ return workflow.compile(checkpointer=self.checkpointer)
446
342
  else:
447
- compiled = workflow.compile()
448
- self.logger.info("✅ Workflow compilado sin checkpointer")
449
-
450
- return compiled
343
+ return workflow.compile()
451
344
 
452
345
  # ===== LEGACY API COMPATIBILITY =====
453
346
 
@@ -471,70 +364,59 @@ class LangChainBot:
471
364
  This method automatically handles tool execution and context integration
472
365
  from processed files while maintaining the original API signature.
473
366
  """
474
- self.logger.info("="*80)
475
- self.logger.info("📨 GET_RESPONSE llamado")
476
- self.logger.debug(f"💬 Input: {user_input[:200]}...")
477
-
478
367
  # Prepare initial workflow state
479
368
  initial_state = {
480
369
  "messages": self.chat_history + [HumanMessage(content=user_input)],
481
370
  "context": ""
482
371
  }
483
372
 
484
- self.logger.debug(f"📊 Estado inicial: {len(initial_state['messages'])} mensajes")
485
-
486
- try:
487
- # Execute the LangGraph workflow
488
- self.logger.info("🔄 Ejecutando workflow...")
489
- result = asyncio.run(self.graph.ainvoke(initial_state))
490
- self.logger.info("✅ Workflow completado")
491
-
492
- # Update internal conversation history
493
- self.chat_history = result["messages"]
494
- self.logger.debug(f"💾 Historial actualizado: {len(self.chat_history)} mensajes")
495
-
496
- # Extract final response from the last assistant message
497
- final_response = ""
498
- total_input_tokens = 0
499
- total_output_tokens = 0
500
-
501
- for msg in reversed(result["messages"]):
502
- if isinstance(msg, AIMessage) and msg.content:
503
- final_response = msg.content
504
- break
505
-
506
- # Extract token usage from response metadata
507
- last_message = result["messages"][-1]
508
- if hasattr(last_message, 'response_metadata'):
509
- token_usage = last_message.response_metadata.get('token_usage', {})
510
- total_input_tokens = token_usage.get('prompt_tokens', 0)
511
- total_output_tokens = token_usage.get('completion_tokens', 0)
512
-
513
- self.logger.info(f"📊 Tokens: input={total_input_tokens}, output={total_output_tokens}")
514
- self.logger.info(f"📝 Respuesta: {len(final_response)} caracteres")
515
- self.logger.info("="*80 + "\n")
516
-
517
- return ResponseModel(
518
- user_tokens=total_input_tokens,
519
- bot_tokens=total_output_tokens,
520
- response=final_response
521
- )
522
-
523
- except Exception as e:
524
- self.logger.error("="*80)
525
- self.logger.error("❌ ERROR EN GET_RESPONSE")
526
- self.logger.error(f"Mensaje: {str(e)}")
527
- self.logger.exception("Traceback:")
528
- self.logger.error("="*80 + "\n")
529
- raise
373
+ # Execute the LangGraph workflow
374
+ # Siempre usar ainvoke (funciona para ambos casos)
375
+ result = asyncio.run(self.graph.ainvoke(initial_state))
376
+
377
+ # Update internal conversation history
378
+ self.chat_history = result["messages"]
379
+
380
+ # Extract final response from the last assistant message
381
+ final_response = ""
382
+ total_input_tokens = 0
383
+ total_output_tokens = 0
384
+
385
+ for msg in reversed(result["messages"]):
386
+ if isinstance(msg, AIMessage) and msg.content:
387
+ final_response = msg.content
388
+ break
389
+
390
+ # Extract token usage from response metadata
391
+ last_message = result["messages"][-1]
392
+ if hasattr(last_message, 'response_metadata'):
393
+ token_usage = last_message.response_metadata.get('token_usage', {})
394
+ total_input_tokens = token_usage.get('prompt_tokens', 0)
395
+ total_output_tokens = token_usage.get('completion_tokens', 0)
396
+
397
+ return ResponseModel(
398
+ user_tokens=total_input_tokens,
399
+ bot_tokens=total_output_tokens,
400
+ response=final_response
401
+ )
530
402
 
531
403
  def get_response_stream(self, user_input: str) -> Generator[str, None, None]:
532
404
  """
533
405
  Generate a streaming response for real-time user interaction.
534
- """
535
- self.logger.info("📨 GET_RESPONSE_STREAM llamado")
536
- self.logger.debug(f"💬 Input: {user_input[:200]}...")
537
406
 
407
+ This method provides streaming capabilities while maintaining backward
408
+ compatibility with the original API.
409
+
410
+ Args:
411
+ user_input (str): The user's message or query
412
+
413
+ Yields:
414
+ str: Response chunks as they are generated
415
+
416
+ Note:
417
+ Current implementation streams complete responses. For token-level
418
+ streaming, consider using the model's native streaming capabilities.
419
+ """
538
420
  initial_state = {
539
421
  "messages": self.chat_history + [HumanMessage(content=user_input)],
540
422
  "context": ""
@@ -542,105 +424,156 @@ class LangChainBot:
542
424
 
543
425
  accumulated_response = ""
544
426
 
545
- try:
546
- for chunk in self.graph.stream(initial_state):
547
- if "agent" in chunk:
548
- for message in chunk["agent"]["messages"]:
549
- if isinstance(message, AIMessage) and message.content:
550
- accumulated_response = message.content
551
- yield message.content
552
-
553
- if accumulated_response:
554
- self.chat_history.extend([
555
- HumanMessage(content=user_input),
556
- AIMessage(content=accumulated_response)
557
- ])
558
-
559
- self.logger.info(f"✅ Stream completado: {len(accumulated_response)} caracteres")
560
-
561
- except Exception as e:
562
- self.logger.error(f"❌ Error en stream: {e}")
563
- self.logger.exception("Traceback:")
564
- raise
427
+ # Stream workflow execution
428
+ for chunk in self.graph.stream(initial_state):
429
+ # Extract content from workflow chunks
430
+ if "agent" in chunk:
431
+ for message in chunk["agent"]["messages"]:
432
+ if isinstance(message, AIMessage) and message.content:
433
+ # Stream complete responses (can be enhanced for token-level streaming)
434
+ accumulated_response = message.content
435
+ yield message.content
436
+
437
+ # Update conversation history after streaming completion
438
+ if accumulated_response:
439
+ self.chat_history.extend([
440
+ HumanMessage(content=user_input),
441
+ AIMessage(content=accumulated_response)
442
+ ])
565
443
 
566
444
  def load_conversation_history(self, messages: List[Message]):
567
445
  """
568
446
  Load conversation history from Django model instances.
447
+
448
+ This method maintains compatibility with existing Django-based conversation
449
+ storage while preparing the history for modern LangGraph processing.
450
+
451
+ Args:
452
+ messages (List[Message]): List of Django Message model instances
453
+ Expected to have 'content' and 'is_bot' attributes
569
454
  """
570
- self.logger.info(f"📥 Cargando historial: {len(messages)} mensajes")
571
455
  self.chat_history.clear()
572
456
  for message in messages:
573
457
  if message.is_bot:
574
458
  self.chat_history.append(AIMessage(content=message.content))
575
459
  else:
576
460
  self.chat_history.append(HumanMessage(content=message.content))
577
- self.logger.debug("✅ Historial cargado")
578
461
 
579
462
  def save_messages(self, user_message: str, bot_response: str):
580
463
  """
581
464
  Save messages to internal conversation history.
465
+
466
+ This method provides backward compatibility for manual history management.
467
+
468
+ Args:
469
+ user_message (str): The user's input message
470
+ bot_response (str): The bot's generated response
582
471
  """
583
- self.logger.debug("💾 Guardando mensajes en historial interno")
584
472
  self.chat_history.append(HumanMessage(content=user_message))
585
473
  self.chat_history.append(AIMessage(content=bot_response))
586
474
 
587
475
  def process_file(self, file: FileProcessorInterface):
588
476
  """
589
477
  Process and index a file for contextual retrieval.
590
- """
591
- self.logger.info("📄 Procesando archivo para indexación")
592
- try:
593
- document = file.getText()
594
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
595
- texts = text_splitter.split_documents(document)
478
+
479
+ This method maintains compatibility with existing file processing workflows
480
+ while leveraging FAISS for efficient similarity search.
481
+
482
+ Args:
483
+ file (FileProcessorInterface): File processor instance that implements getText()
596
484
 
597
- self.logger.debug(f"✂️ Documento dividido en {len(texts)} chunks")
485
+ Note:
486
+ Processed files are automatically available for context retrieval
487
+ in subsequent conversations without additional configuration.
488
+ """
489
+ document = file.getText()
490
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
491
+ texts = text_splitter.split_documents(document)
598
492
 
599
- if self.vector_store is None:
600
- self.vector_store = FAISS.from_texts(
601
- [doc.page_content for doc in texts],
602
- self.embeddings
603
- )
604
- self.logger.info("✅ Vector store creado")
605
- else:
606
- self.vector_store.add_texts([doc.page_content for doc in texts])
607
- self.logger.info("✅ Textos agregados a vector store existente")
608
-
609
- except Exception as e:
610
- self.logger.error(f"❌ Error procesando archivo: {e}")
611
- self.logger.exception("Traceback:")
612
- raise
493
+ if self.vector_store is None:
494
+ self.vector_store = FAISS.from_texts(
495
+ [doc.page_content for doc in texts],
496
+ self.embeddings
497
+ )
498
+ else:
499
+ self.vector_store.add_texts([doc.page_content for doc in texts])
613
500
 
614
501
  def clear_memory(self):
615
502
  """
616
503
  Clear conversation history and processed file context.
504
+
505
+ This method resets the bot to a clean state, removing all conversation
506
+ history and processed file context.
617
507
  """
618
- self.logger.info("🗑️ Limpiando memoria")
619
508
  self.chat_history.clear()
620
509
  self.vector_store = None
621
- self.logger.debug("✅ Memoria limpiada")
622
510
 
623
511
  def get_chat_history(self) -> List[BaseMessage]:
624
512
  """
625
513
  Retrieve a copy of the current conversation history.
514
+
515
+ Returns:
516
+ List[BaseMessage]: Copy of the conversation history
626
517
  """
627
518
  return self.chat_history.copy()
628
519
 
629
520
  def set_chat_history(self, history: List[BaseMessage]):
630
521
  """
631
522
  Set the conversation history from a list of BaseMessage instances.
523
+
524
+ Args:
525
+ history (List[BaseMessage]): New conversation history to set
632
526
  """
633
- self.logger.info(f"📝 Estableciendo historial: {len(history)} mensajes")
634
527
  self.chat_history = history.copy()
635
528
 
636
529
  def _get_context(self, query: str) -> str:
637
530
  """
638
531
  Retrieve relevant context from processed files using similarity search.
532
+
533
+ This method performs semantic search over processed file content to find
534
+ the most relevant information for the current query.
535
+
536
+ Args:
537
+ query (str): The query to search for relevant context
538
+
539
+ Returns:
540
+ str: Concatenated relevant context from processed files
639
541
  """
640
542
  if self.vector_store:
641
- self.logger.debug(f"🔍 Buscando contexto para query: {query[:100]}...")
642
543
  docs = self.vector_store.similarity_search(query, k=4)
643
- context = "\n".join([doc.page_content for doc in docs])
644
- self.logger.debug(f"✅ Contexto encontrado: {len(context)} caracteres")
645
- return context
544
+ return "\n".join([doc.page_content for doc in docs])
545
+ return ""
546
+
547
+ def process_file(self, file: FileProcessorInterface):
548
+ """API original - Procesa archivo y lo añade al vector store"""
549
+ document = file.getText()
550
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
551
+ texts = text_splitter.split_documents(document)
552
+
553
+ if self.vector_store is None:
554
+ self.vector_store = FAISS.from_texts(
555
+ [doc.page_content for doc in texts],
556
+ self.embeddings
557
+ )
558
+ else:
559
+ self.vector_store.add_texts([doc.page_content for doc in texts])
560
+
561
+ def clear_memory(self):
562
+ """API original - Limpia la memoria de conversación"""
563
+ self.chat_history.clear()
564
+ self.vector_store = None
565
+
566
+ def get_chat_history(self) -> List[BaseMessage]:
567
+ """API original - Obtiene el historial completo"""
568
+ return self.chat_history.copy()
569
+
570
+ def set_chat_history(self, history: List[BaseMessage]):
571
+ """API original - Establece el historial de conversación"""
572
+ self.chat_history = history.copy()
573
+
574
+ def _get_context(self, query: str) -> str:
575
+ """Obtiene contexto relevante de archivos procesados"""
576
+ if self.vector_store:
577
+ docs = self.vector_store.similarity_search(query, k=4)
578
+ return "\n".join([doc.page_content for doc in docs])
646
579
  return ""
@@ -0,0 +1,66 @@
1
+ from pydantic import BaseModel
2
+ from typing import Dict, Any, Type
3
+ from sonika_langchain_bot.langchain_class import ILanguageModel
4
+
5
+ class ClassificationResponse(BaseModel):
6
+ """Respuesta de clasificación con tokens utilizados"""
7
+ input_tokens: int
8
+ output_tokens: int
9
+ result: Dict[str, Any]
10
+
11
+ class TextClassifier:
12
+ def __init__(self, validation_class: Type[BaseModel], llm: ILanguageModel):
13
+ self.llm = llm
14
+ self.validation_class = validation_class
15
+ # Guardamos ambas versiones del modelo
16
+ self.original_model = self.llm.model # Sin structured output
17
+ self.structured_model = self.llm.model.with_structured_output(validation_class)
18
+
19
+ def classify(self, text: str) -> ClassificationResponse:
20
+ """
21
+ Clasifica el texto según la clase de validación.
22
+
23
+ Args:
24
+ text: Texto a clasificar
25
+
26
+ Returns:
27
+ ClassificationResponse: Objeto con result, input_tokens y output_tokens
28
+ """
29
+ prompt = f"""
30
+ Classify the following text based on the properties defined in the validation class.
31
+
32
+ Text: {text}
33
+
34
+ Only extract the properties mentioned in the validation class.
35
+ """
36
+
37
+ # Primero invocamos el modelo ORIGINAL para obtener metadata de tokens
38
+ raw_response = self.original_model.invoke(prompt)
39
+
40
+ # Extraer información de tokens del AIMessage original
41
+ input_tokens = 0
42
+ output_tokens = 0
43
+
44
+ if hasattr(raw_response, 'response_metadata'):
45
+ token_usage = raw_response.response_metadata.get('token_usage', {})
46
+ input_tokens = token_usage.get('prompt_tokens', 0)
47
+ output_tokens = token_usage.get('completion_tokens', 0)
48
+
49
+ # Ahora invocamos con structured output para obtener el objeto parseado
50
+ response = self.structured_model.invoke(prompt)
51
+
52
+ # Validar que el response es de la clase correcta
53
+ if isinstance(response, self.validation_class):
54
+ # Crear el resultado dinámicamente basado en los atributos
55
+ result_data = {
56
+ field: getattr(response, field)
57
+ for field in self.validation_class.__fields__.keys()
58
+ }
59
+
60
+ return ClassificationResponse(
61
+ input_tokens=input_tokens,
62
+ output_tokens=output_tokens,
63
+ result=result_data
64
+ )
65
+ else:
66
+ raise ValueError(f"The response is not of type '{self.validation_class.__name__}'")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sonika-langchain-bot
3
- Version: 0.0.17
3
+ Version: 0.0.19
4
4
  Summary: Agente langchain con LLM
5
5
  Author: Erley Blanco Carvajal
6
6
  License: MIT License
@@ -76,6 +76,6 @@ def clasification():
76
76
  result = classifier.classify("how are you?")
77
77
  print(result)
78
78
 
79
- bot_bdi()
79
+ #bot_bdi()
80
80
  #bot_bdi_streaming()
81
- #clasification()
81
+ clasification()
@@ -1,30 +0,0 @@
1
- from pydantic import BaseModel
2
- from typing import Dict, Any, Type
3
- from sonika_langchain_bot.langchain_class import ILanguageModel
4
-
5
- # Clase para realizar la clasificación de texto
6
- class TextClassifier:
7
- def __init__(self, validation_class: Type[BaseModel], llm: ILanguageModel):
8
- self.llm =llm
9
- self.validation_class = validation_class
10
- #configuramos el modelo para que tenga una estructura de salida
11
- self.llm.model = self.llm.model.with_structured_output(validation_class)
12
-
13
- def classify(self, text: str) -> Dict[str, Any]:
14
- # Crear el template del prompt
15
- prompt = f"""
16
- Classify the following text based on the properties defined in the validation class.
17
-
18
- Text: {text}
19
-
20
- Only extract the properties mentioned in the validation class.
21
- """
22
- response = self.llm.invoke(prompt=prompt)
23
-
24
- # Asegurarse de que el `response` es de la clase de validación proporcionada
25
- if isinstance(response, self.validation_class):
26
- # Crear el resultado dinámicamente basado en los atributos de la clase de validación
27
- result = {field: getattr(response, field) for field in self.validation_class.__fields__.keys()}
28
- return result
29
- else:
30
- raise ValueError(f"The response is not of type '{self.validation_class.__name__}'")