ziya 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/agents/agent.py +71 -73
- app/agents/direct_streaming.py +1 -1
- app/agents/prompts.py +1 -1
- app/agents/prompts_manager.py +14 -10
- app/agents/wrappers/google_direct.py +31 -1
- app/agents/wrappers/nova_tool_execution.py +2 -2
- app/agents/wrappers/nova_wrapper.py +1 -1
- app/agents/wrappers/ziya_bedrock.py +53 -31
- app/config/models_config.py +61 -20
- app/config/shell_config.py +5 -1
- app/extensions/prompt_extensions/claude_extensions.py +27 -5
- app/extensions/prompt_extensions/mcp_prompt_extensions.py +82 -56
- app/main.py +5 -3
- app/mcp/client.py +19 -10
- app/mcp/manager.py +68 -10
- app/mcp/tools.py +8 -9
- app/mcp_servers/shell_server.py +3 -3
- app/middleware/streaming.py +29 -41
- app/routes/file_validation.py +35 -0
- app/routes/mcp_routes.py +54 -8
- app/server.py +525 -614
- app/streaming_tool_executor.py +748 -137
- app/templates/asset-manifest.json +20 -20
- app/templates/index.html +1 -1
- app/templates/static/css/{main.0297bfee.css → main.e7109b49.css} +2 -2
- app/templates/static/css/main.e7109b49.css.map +1 -0
- app/templates/static/js/14386.65fcfe53.chunk.js +2 -0
- app/templates/static/js/14386.65fcfe53.chunk.js.map +1 -0
- app/templates/static/js/35589.0368973a.chunk.js +2 -0
- app/templates/static/js/35589.0368973a.chunk.js.map +1 -0
- app/templates/static/js/{50295.ab92f61b.chunk.js → 50295.90aca393.chunk.js} +3 -3
- app/templates/static/js/50295.90aca393.chunk.js.map +1 -0
- app/templates/static/js/55734.5f0fd567.chunk.js +2 -0
- app/templates/static/js/55734.5f0fd567.chunk.js.map +1 -0
- app/templates/static/js/58542.57fed736.chunk.js +2 -0
- app/templates/static/js/58542.57fed736.chunk.js.map +1 -0
- app/templates/static/js/{68418.2554bb1e.chunk.js → 68418.f7b4d2d9.chunk.js} +3 -3
- app/templates/static/js/68418.f7b4d2d9.chunk.js.map +1 -0
- app/templates/static/js/99948.b280eda0.chunk.js +2 -0
- app/templates/static/js/99948.b280eda0.chunk.js.map +1 -0
- app/templates/static/js/main.e075582c.js +3 -0
- app/templates/static/js/main.e075582c.js.map +1 -0
- app/utils/code_util.py +5 -2
- app/utils/context_cache.py +11 -0
- app/utils/conversation_filter.py +90 -0
- app/utils/custom_bedrock.py +43 -1
- app/utils/diff_utils/validation/validators.py +32 -22
- app/utils/file_cache.py +5 -3
- app/utils/precision_prompt_system.py +116 -0
- app/utils/streaming_optimizer.py +100 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/METADATA +3 -2
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/RECORD +59 -55
- app/templates/static/css/main.0297bfee.css.map +0 -1
- app/templates/static/js/14386.567bf803.chunk.js +0 -2
- app/templates/static/js/14386.567bf803.chunk.js.map +0 -1
- app/templates/static/js/35589.278ecda2.chunk.js +0 -2
- app/templates/static/js/35589.278ecda2.chunk.js.map +0 -1
- app/templates/static/js/50295.ab92f61b.chunk.js.map +0 -1
- app/templates/static/js/55734.90d8bd52.chunk.js +0 -2
- app/templates/static/js/55734.90d8bd52.chunk.js.map +0 -1
- app/templates/static/js/58542.08fb5cf4.chunk.js +0 -2
- app/templates/static/js/58542.08fb5cf4.chunk.js.map +0 -1
- app/templates/static/js/68418.2554bb1e.chunk.js.map +0 -1
- app/templates/static/js/99948.71670e91.chunk.js +0 -2
- app/templates/static/js/99948.71670e91.chunk.js.map +0 -1
- app/templates/static/js/main.1d79eac2.js +0 -3
- app/templates/static/js/main.1d79eac2.js.map +0 -1
- /app/templates/static/js/{50295.ab92f61b.chunk.js.LICENSE.txt → 50295.90aca393.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{68418.2554bb1e.chunk.js.LICENSE.txt → 68418.f7b4d2d9.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{main.1d79eac2.js.LICENSE.txt → main.e075582c.js.LICENSE.txt} +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/WHEEL +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/entry_points.txt +0 -0
- {ziya-0.3.0.dist-info → ziya-0.3.2.dist-info}/licenses/LICENSE +0 -0
app/server.py
CHANGED
|
@@ -39,7 +39,8 @@ from app.agents.agent import model, RetryingChatBedrock, initialize_langserve
|
|
|
39
39
|
from app.agents.agent import get_or_create_agent, get_or_create_agent_executor, create_agent_chain, create_agent_executor
|
|
40
40
|
from app.agents.agent import update_conversation_state, update_and_return, parse_output
|
|
41
41
|
from langchain_google_genai.chat_models import ChatGoogleGenerativeAIError
|
|
42
|
-
from fastapi.responses import FileResponse
|
|
42
|
+
from fastapi.responses import FileResponse
|
|
43
|
+
from starlette.responses import StreamingResponse
|
|
43
44
|
from pydantic import BaseModel, Field
|
|
44
45
|
|
|
45
46
|
# Direct streaming imports
|
|
@@ -81,7 +82,7 @@ from app.utils.diff_utils import apply_diff_pipeline
|
|
|
81
82
|
from app.utils.custom_exceptions import ThrottlingException, ExpiredTokenException
|
|
82
83
|
from app.utils.custom_exceptions import ValidationError
|
|
83
84
|
from app.utils.file_utils import read_file_content
|
|
84
|
-
from app.middleware import RequestSizeMiddleware, ModelSettingsMiddleware, ErrorHandlingMiddleware, HunkStatusMiddleware
|
|
85
|
+
from app.middleware import RequestSizeMiddleware, ModelSettingsMiddleware, ErrorHandlingMiddleware, HunkStatusMiddleware, StreamingMiddleware
|
|
85
86
|
from app.utils.context_enhancer import initialize_ast_if_enabled
|
|
86
87
|
from fastapi.websockets import WebSocketState
|
|
87
88
|
from app.middleware.continuation import ContinuationMiddleware
|
|
@@ -91,141 +92,42 @@ def build_messages_for_streaming(question: str, chat_history: List, files: List,
|
|
|
91
92
|
Build messages for streaming using the extended prompt template.
|
|
92
93
|
This centralizes message construction to avoid duplication.
|
|
93
94
|
"""
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
from
|
|
98
|
-
|
|
95
|
+
logger.info(f"🔍 FUNCTION_START: build_messages_for_streaming called with {len(files)} files")
|
|
96
|
+
|
|
97
|
+
# Always use precision prompt system
|
|
98
|
+
from app.utils.precision_prompt_system import precision_system
|
|
99
|
+
from app.agents.prompts_manager import get_model_info_from_config
|
|
100
|
+
|
|
99
101
|
model_info = get_model_info_from_config()
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
110
|
-
try:
|
|
111
|
-
from app.mcp.manager import get_mcp_manager
|
|
112
|
-
mcp_manager = get_mcp_manager()
|
|
113
|
-
if mcp_manager.is_initialized:
|
|
114
|
-
available_tools = [tool.name for tool in mcp_manager.get_all_tools()]
|
|
115
|
-
mcp_context["mcp_tools_available"] = len(available_tools) > 0
|
|
116
|
-
mcp_context["available_mcp_tools"] = available_tools
|
|
117
|
-
except Exception as e:
|
|
118
|
-
logger.warning(f"Could not get MCP tools: {e}")
|
|
119
|
-
|
|
120
|
-
# Get file context
|
|
121
|
-
from app.agents.agent import extract_codebase
|
|
122
|
-
file_context = extract_codebase({"config": {"files": files}, "conversation_id": conversation_id})
|
|
123
|
-
|
|
124
|
-
# Apply post-instructions to the question once here
|
|
125
|
-
from app.utils.post_instructions import PostInstructionManager
|
|
126
|
-
modified_question = PostInstructionManager.apply_post_instructions(
|
|
127
|
-
query=question,
|
|
128
|
-
model_name=model_info["model_name"],
|
|
129
|
-
model_family=model_info["model_family"],
|
|
130
|
-
endpoint=model_info["endpoint"]
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
# Get the extended prompt and format it properly
|
|
134
|
-
extended_prompt = get_extended_prompt(
|
|
135
|
-
model_name=model_info["model_name"],
|
|
136
|
-
model_family=model_info["model_family"],
|
|
137
|
-
endpoint=model_info["endpoint"],
|
|
138
|
-
context=mcp_context
|
|
102
|
+
request_path = "/streaming_tools" # Default for streaming
|
|
103
|
+
|
|
104
|
+
# Use precision system for 100% equivalence
|
|
105
|
+
messages = precision_system.build_messages(
|
|
106
|
+
request_path=request_path,
|
|
107
|
+
model_info=model_info,
|
|
108
|
+
files=files,
|
|
109
|
+
question=question,
|
|
110
|
+
chat_history=chat_history
|
|
139
111
|
)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
from app.mcp.manager import get_mcp_manager
|
|
145
|
-
mcp_manager = get_mcp_manager()
|
|
146
|
-
if mcp_manager.is_initialized:
|
|
147
|
-
tools_list = [f"- {tool.name}: {tool.description}" for tool in mcp_manager.get_all_tools()]
|
|
148
|
-
except Exception as e:
|
|
149
|
-
logger.warning(f"Could not get tools for template: {e}")
|
|
150
|
-
|
|
151
|
-
# Build messages manually to ensure proper conversation history
|
|
152
|
-
messages = []
|
|
153
|
-
|
|
154
|
-
# Add system message with context
|
|
155
|
-
system_content = extended_prompt.messages[0].prompt.template.format(
|
|
156
|
-
codebase=file_context,
|
|
157
|
-
ast_context="",
|
|
158
|
-
tools="\n".join(tools_list) if tools_list else "No tools available",
|
|
159
|
-
TOOL_SENTINEL_OPEN=TOOL_SENTINEL_OPEN,
|
|
160
|
-
TOOL_SENTINEL_CLOSE=TOOL_SENTINEL_CLOSE
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
if use_langchain_format:
|
|
164
|
-
messages.append(SystemMessage(content=system_content))
|
|
165
|
-
else:
|
|
166
|
-
messages.append({"role": "system", "content": system_content})
|
|
167
|
-
|
|
168
|
-
# Add conversation history
|
|
169
|
-
for item in chat_history:
|
|
170
|
-
if isinstance(item, dict):
|
|
171
|
-
role = item.get('type', item.get('role', 'human'))
|
|
172
|
-
content = item.get('content', '')
|
|
173
|
-
elif isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
174
|
-
role, content = item[0], item[1]
|
|
175
|
-
else:
|
|
176
|
-
continue
|
|
177
|
-
|
|
178
|
-
if role in ['human', 'user']:
|
|
179
|
-
if use_langchain_format:
|
|
180
|
-
messages.append(HumanMessage(content=content))
|
|
181
|
-
else:
|
|
182
|
-
messages.append({"role": "user", "content": content})
|
|
183
|
-
elif role in ['assistant', 'ai']:
|
|
184
|
-
if use_langchain_format:
|
|
185
|
-
messages.append(AIMessage(content=content))
|
|
186
|
-
else:
|
|
187
|
-
messages.append({"role": "assistant", "content": content})
|
|
188
|
-
|
|
189
|
-
# Add current question
|
|
112
|
+
|
|
113
|
+
logger.info(f"🎯 PRECISION_SYSTEM: Built {len(messages)} messages with {len(files)} files preserved")
|
|
114
|
+
|
|
115
|
+
# Convert to LangChain format if needed
|
|
190
116
|
if use_langchain_format:
|
|
191
|
-
messages
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
117
|
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
|
118
|
+
langchain_messages = []
|
|
119
|
+
for msg in messages:
|
|
120
|
+
if isinstance(msg, dict) and "role" in msg:
|
|
121
|
+
if msg["role"] == "system":
|
|
122
|
+
langchain_messages.append(SystemMessage(content=msg["content"]))
|
|
123
|
+
elif msg["role"] == "user":
|
|
124
|
+
langchain_messages.append(HumanMessage(content=msg["content"]))
|
|
125
|
+
elif msg["role"] == "assistant":
|
|
126
|
+
langchain_messages.append(AIMessage(content=msg["content"]))
|
|
127
|
+
return langchain_messages
|
|
128
|
+
|
|
195
129
|
return messages
|
|
196
|
-
|
|
197
|
-
logger.info(f"File context length: {len(file_context)} characters")
|
|
198
|
-
logger.info(f"Modified question length: {len(modified_question)} characters")
|
|
199
|
-
logger.info(f"Chat history items: {len(chat_history)}")
|
|
200
|
-
logger.info(f"Available tools: {len(tools_list)}")
|
|
201
|
-
logger.info(f"MCP tools available: {mcp_context.get('mcp_tools_available', False)}")
|
|
202
|
-
|
|
203
|
-
# Debug: Check template substitution
|
|
204
|
-
logger.debug("=== TEMPLATE SUBSTITUTION DEBUG ===")
|
|
205
|
-
logger.debug("Template variables being substituted:")
|
|
206
|
-
logger.debug(f"- codebase length: {len(file_context)}")
|
|
207
|
-
logger.debug(f"- question length: {len(modified_question)}")
|
|
208
|
-
logger.debug(f"- chat_history items: {len(_format_chat_history(chat_history))}")
|
|
209
|
-
logger.debug(f"- tools count: {len(tools_list)}")
|
|
210
|
-
|
|
211
|
-
formatted_messages = extended_prompt.format_messages(
|
|
212
|
-
codebase=file_context,
|
|
213
|
-
question=modified_question,
|
|
214
|
-
chat_history=_format_chat_history(chat_history),
|
|
215
|
-
ast_context="", # Will be enhanced if AST is enabled
|
|
216
|
-
tools="\n".join(tools_list) if tools_list else "No tools available",
|
|
217
|
-
TOOL_SENTINEL_OPEN=TOOL_SENTINEL_OPEN,
|
|
218
|
-
TOOL_SENTINEL_CLOSE=TOOL_SENTINEL_CLOSE
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
# Debug: Check if template substitution caused duplication
|
|
222
|
-
for i, msg in enumerate(formatted_messages):
|
|
223
|
-
if hasattr(msg, 'content'):
|
|
224
|
-
file_markers_count = msg.content.count('File: ')
|
|
225
|
-
if file_markers_count > 0:
|
|
226
|
-
logger.debug(f"Message {i} after template substitution has {file_markers_count} file markers")
|
|
227
|
-
|
|
228
|
-
return formatted_messages
|
|
130
|
+
|
|
229
131
|
|
|
230
132
|
# Dictionary to track active streaming tasks
|
|
231
133
|
active_streams = {}
|
|
@@ -270,7 +172,7 @@ async def chat_endpoint(request: Request):
|
|
|
270
172
|
|
|
271
173
|
# Extract data from the request
|
|
272
174
|
messages = body.get('messages', [])
|
|
273
|
-
question = body.get('question', '')
|
|
175
|
+
question = body.get('question', '') or body.get('message', '') # Check both question and message
|
|
274
176
|
files = body.get('files', [])
|
|
275
177
|
conversation_id = body.get('conversation_id')
|
|
276
178
|
|
|
@@ -285,6 +187,7 @@ async def chat_endpoint(request: Request):
|
|
|
285
187
|
is_bedrock_deepseek = current_model and 'deepseek' in current_model.lower()
|
|
286
188
|
is_bedrock_openai = current_model and 'openai' in current_model.lower()
|
|
287
189
|
is_google_model = current_model and ('gemini' in current_model.lower() or 'google' in current_model.lower())
|
|
190
|
+
# Check if direct streaming is enabled globally - use direct streaming by default for Bedrock models like 0.3.1
|
|
288
191
|
use_direct_streaming = is_bedrock_claude or is_bedrock_nova or is_bedrock_deepseek or is_bedrock_openai or is_google_model
|
|
289
192
|
|
|
290
193
|
logger.info(f"🔍 CHAT_ENDPOINT: Current model = {current_model}, is_bedrock_claude = {is_bedrock_claude}")
|
|
@@ -329,18 +232,19 @@ async def chat_endpoint(request: Request):
|
|
|
329
232
|
elif role in ['assistant', 'ai']:
|
|
330
233
|
chat_history.append({'type': 'ai', 'content': content})
|
|
331
234
|
|
|
332
|
-
# Format the data for stream_chunks
|
|
235
|
+
# Format the data for stream_chunks - LangChain expects files at top level
|
|
333
236
|
formatted_body = {
|
|
334
237
|
'question': question,
|
|
335
238
|
'conversation_id': conversation_id,
|
|
336
239
|
'chat_history': chat_history,
|
|
240
|
+
'files': files, # LangChain expects files at top level
|
|
337
241
|
'config': {
|
|
338
242
|
'conversation_id': conversation_id,
|
|
339
|
-
'files': files
|
|
243
|
+
'files': files # Also include in config for compatibility
|
|
340
244
|
}
|
|
341
245
|
}
|
|
342
246
|
|
|
343
|
-
logger.info("[CHAT_ENDPOINT]
|
|
247
|
+
logger.info("[CHAT_ENDPOINT] Using StreamingToolExecutor via stream_chunks for unified execution")
|
|
344
248
|
|
|
345
249
|
return StreamingResponse(
|
|
346
250
|
stream_chunks(formatted_body),
|
|
@@ -349,6 +253,10 @@ async def chat_endpoint(request: Request):
|
|
|
349
253
|
"Cache-Control": "no-cache",
|
|
350
254
|
"Connection": "keep-alive",
|
|
351
255
|
"X-Accel-Buffering": "no",
|
|
256
|
+
"X-Content-Type-Options": "nosniff",
|
|
257
|
+
"Transfer-Encoding": "chunked",
|
|
258
|
+
"X-Nginx-Buffering": "no",
|
|
259
|
+
"Proxy-Buffering": "off",
|
|
352
260
|
"Access-Control-Allow-Origin": "*",
|
|
353
261
|
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
354
262
|
"Access-Control-Allow-Headers": "Content-Type"
|
|
@@ -424,6 +332,9 @@ app.add_middleware(
|
|
|
424
332
|
allow_headers=["*"],
|
|
425
333
|
)
|
|
426
334
|
|
|
335
|
+
# Add streaming middleware
|
|
336
|
+
app.add_middleware(StreamingMiddleware)
|
|
337
|
+
|
|
427
338
|
# Add request size middleware
|
|
428
339
|
app.add_middleware(
|
|
429
340
|
RequestSizeMiddleware,
|
|
@@ -510,6 +421,9 @@ if os.path.exists(static_dir):
|
|
|
510
421
|
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
|
511
422
|
logger.info(f"Mounted static files from {static_dir}")
|
|
512
423
|
|
|
424
|
+
# Global flag to prevent multiple LangServe initializations
|
|
425
|
+
_langserve_initialized = False
|
|
426
|
+
|
|
513
427
|
# Initialize MCP manager on startup
|
|
514
428
|
@app.on_event("startup")
|
|
515
429
|
async def startup_event():
|
|
@@ -534,7 +448,7 @@ async def startup_event():
|
|
|
534
448
|
# Reinitialize the agent chain now that MCP is available
|
|
535
449
|
# Invalidate agent chain cache since MCP tools are now available
|
|
536
450
|
from app.agents.models import ModelManager
|
|
537
|
-
ModelManager.invalidate_agent_chain_cache()
|
|
451
|
+
# ModelManager.invalidate_agent_chain_cache() # Method doesn't exist
|
|
538
452
|
|
|
539
453
|
# Initialize secure MCP tools
|
|
540
454
|
from app.mcp.enhanced_tools import get_connection_pool as get_secure_pool
|
|
@@ -548,9 +462,10 @@ async def startup_event():
|
|
|
548
462
|
agent = create_agent_chain(model.get_model())
|
|
549
463
|
agent_executor = create_agent_executor(agent)
|
|
550
464
|
|
|
551
|
-
#
|
|
552
|
-
initialize_langserve(app, agent_executor)
|
|
553
|
-
|
|
465
|
+
# COMPLETELY DISABLED: LangServe routes cause duplicate execution with /api/chat
|
|
466
|
+
# initialize_langserve(app, agent_executor)
|
|
467
|
+
# _langserve_initialized = True
|
|
468
|
+
logger.info("LangServe completely disabled to prevent duplicate execution - using /api/chat only")
|
|
554
469
|
else:
|
|
555
470
|
logger.warning("MCP initialization failed or no servers configured")
|
|
556
471
|
logger.info("MCP manager initialized successfully during startup")
|
|
@@ -599,104 +514,9 @@ logger.info("=== END /ziya ROUTES ===")
|
|
|
599
514
|
# DISABLED: LangServe routes bypass custom streaming and extended context handling
|
|
600
515
|
# add_routes(app, agent_executor, disabled_endpoints=["playground", "stream_log", "stream", "invoke"], path="/ziya")
|
|
601
516
|
|
|
602
|
-
#
|
|
603
|
-
@app.post("/ziya/stream_log")
|
|
604
|
-
async def stream_log_endpoint(request: Request, body: dict):
|
|
605
|
-
"""Stream log endpoint with proper diff parameter handling."""
|
|
606
|
-
try:
|
|
607
|
-
# Debug logging
|
|
608
|
-
logger.info("Stream log endpoint request body:")
|
|
609
|
-
|
|
610
|
-
# Extract and store diff parameter if present
|
|
611
|
-
diff_content = None
|
|
612
|
-
if 'diff' in body:
|
|
613
|
-
diff_content = body['diff']
|
|
614
|
-
# Create a copy of the body without the diff parameter
|
|
615
|
-
body_copy = {k: v for k, v in body.items() if k != 'diff'}
|
|
616
|
-
else:
|
|
617
|
-
body_copy = body
|
|
618
|
-
|
|
619
|
-
# Extract input from body if present
|
|
620
|
-
if 'input' in body_copy:
|
|
621
|
-
input_data = body_copy['input']
|
|
622
|
-
|
|
623
|
-
# Get the question from input_data
|
|
624
|
-
question = input_data.get('question', 'EMPTY')
|
|
625
|
-
logger.info(f"Question from input: '{question}'")
|
|
626
|
-
|
|
627
|
-
# Handle chat_history
|
|
628
|
-
chat_history = input_data.get('chat_history', [])
|
|
629
|
-
if not isinstance(chat_history, list):
|
|
630
|
-
logger.warning(f"Chat history is not a list: {type(chat_history)}")
|
|
631
|
-
chat_history = []
|
|
632
|
-
|
|
633
|
-
# Log chat history details for debugging
|
|
634
|
-
logger.info(f"Chat history length: {len(chat_history)}")
|
|
635
|
-
for i, msg in enumerate(chat_history):
|
|
636
|
-
if isinstance(msg, dict):
|
|
637
|
-
logger.info(f"Input chat history item {i}: type={msg.get('type', 'unknown')}")
|
|
638
|
-
else:
|
|
639
|
-
logger.info(f"Input chat history item {i}: type={type(msg)}")
|
|
640
|
-
|
|
641
|
-
input_data['chat_history'] = chat_history
|
|
642
|
-
|
|
643
|
-
# Handle config and files
|
|
644
|
-
config = input_data.get('config', {})
|
|
645
|
-
files = []
|
|
646
|
-
if isinstance(config, dict):
|
|
647
|
-
files = config.get("files", [])
|
|
648
|
-
elif isinstance(config, list):
|
|
649
|
-
logger.warning("Config is a list, assuming it's the files list")
|
|
650
|
-
files = config
|
|
651
|
-
|
|
652
|
-
if not isinstance(files, list):
|
|
653
|
-
logger.warning(f"Files is not a list: {type(files)}")
|
|
654
|
-
files = []
|
|
655
|
-
|
|
656
|
-
# Count string files for summary logging
|
|
657
|
-
string_file_count = sum(1 for f in files if isinstance(f, str))
|
|
658
|
-
if string_file_count > 0:
|
|
659
|
-
logger.info(f"Files count: {len(files)} ({string_file_count} are strings)")
|
|
660
|
-
else:
|
|
661
|
-
logger.info(f"Files count: {len(files)}")
|
|
662
|
-
# Don't log individual file details here - too verbose
|
|
663
|
-
|
|
664
|
-
# Update input_data with normalized values
|
|
665
|
-
input_data['chat_history'] = chat_history
|
|
666
|
-
input_data['config'] = {'files': files} if isinstance(config, list) else config
|
|
667
|
-
|
|
668
|
-
# Ensure we use the current question from input_data
|
|
669
|
-
input_data['question'] = question
|
|
670
|
-
body_copy = input_data
|
|
671
|
-
|
|
672
|
-
# Use direct streaming with StreamingResponse
|
|
673
|
-
return StreamingResponse(
|
|
674
|
-
stream_chunks(body_copy),
|
|
675
|
-
media_type="text/event-stream",
|
|
676
|
-
headers={
|
|
677
|
-
"Cache-Control": "no-cache",
|
|
678
|
-
"Connection": "keep-alive",
|
|
679
|
-
"Access-Control-Allow-Origin": "*",
|
|
680
|
-
"Access-Control-Allow-Headers": "*",
|
|
681
|
-
"Content-Type": "text/event-stream"
|
|
682
|
-
}
|
|
683
|
-
)
|
|
684
|
-
except Exception as e:
|
|
685
|
-
logger.error(f"Error in stream_log_endpoint: {str(e)}")
|
|
686
|
-
# Return error as streaming response
|
|
687
|
-
error_json = json.dumps({"error": str(e)})
|
|
688
|
-
return StreamingResponse(
|
|
689
|
-
(f"data: {error_json}\n\ndata: {json.dumps({'done': True})}\n\n" for _ in range(1)),
|
|
690
|
-
media_type="text/event-stream",
|
|
691
|
-
headers={
|
|
692
|
-
"Cache-Control": "no-cache",
|
|
693
|
-
"Connection": "keep-alive",
|
|
694
|
-
"Access-Control-Allow-Origin": "*",
|
|
695
|
-
"Access-Control-Allow-Headers": "*",
|
|
696
|
-
"Content-Type": "text/event-stream"
|
|
697
|
-
}
|
|
698
|
-
)
|
|
699
|
-
|
|
517
|
+
# DISABLED: Manual /ziya endpoints conflict with /api/chat
|
|
518
|
+
# @app.post("/ziya/stream_log")
|
|
519
|
+
# async def stream_log_endpoint(request: Request, body: dict):
|
|
700
520
|
async def cleanup_stream(conversation_id: str):
|
|
701
521
|
"""Clean up resources when a stream ends or is aborted."""
|
|
702
522
|
if conversation_id in active_streams:
|
|
@@ -985,10 +805,11 @@ async def handle_continuation(continuation_state: Dict[str, Any]):
|
|
|
985
805
|
|
|
986
806
|
# Add a marker for the continuation start
|
|
987
807
|
continuation_start_marker = "**📝 Continuing from previous response...**\n\n"
|
|
988
|
-
yield f"data: {json.dumps({'
|
|
808
|
+
yield f"data: {json.dumps({'content': continuation_start_marker})}\n\n"
|
|
989
809
|
|
|
990
|
-
# Add continuation prompt
|
|
991
|
-
|
|
810
|
+
# Add continuation prompt with tool execution context
|
|
811
|
+
continuation_prompt_with_context = f"{continuation_prompt}\n\nIMPORTANT: Do not simulate or hallucinate tool calls. Only use actual tool execution when needed."
|
|
812
|
+
updated_messages.append(HumanMessage(content=continuation_prompt_with_context))
|
|
992
813
|
|
|
993
814
|
# Stream continuation with clean buffer
|
|
994
815
|
async for chunk in stream_continuation(updated_messages, continuation_state):
|
|
@@ -997,8 +818,7 @@ async def handle_continuation(continuation_state: Dict[str, Any]):
|
|
|
997
818
|
except Exception as e:
|
|
998
819
|
logger.error(f"🔄 CONTINUATION: Error in continuation {continuation_id}: {e}")
|
|
999
820
|
# Yield error and complete the stream
|
|
1000
|
-
|
|
1001
|
-
yield f"data: {json.dumps({'ops': [error_chunk]})}\n\n"
|
|
821
|
+
yield f"data: {json.dumps({'error': f'Continuation error: {str(e)}'})}\n\n"
|
|
1002
822
|
finally:
|
|
1003
823
|
# Clean up continuation state
|
|
1004
824
|
with _continuation_lock:
|
|
@@ -1038,8 +858,7 @@ async def stream_continuation(messages: List, continuation_state: Dict[str, Any]
|
|
|
1038
858
|
content_str = str(content) if content else ""
|
|
1039
859
|
|
|
1040
860
|
if content_str:
|
|
1041
|
-
|
|
1042
|
-
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
861
|
+
yield f"data: {json.dumps({'content': content_str})}\n\n"
|
|
1043
862
|
|
|
1044
863
|
yield f"data: {json.dumps({'done': True})}\n\n"
|
|
1045
864
|
|
|
@@ -1053,22 +872,126 @@ async def stream_chunks(body):
|
|
|
1053
872
|
logger.error("🔍 EXECUTION_TRACE: stream_chunks() called - ENTRY POINT")
|
|
1054
873
|
logger.info("🔍 STREAM_CHUNKS: Function called")
|
|
1055
874
|
|
|
1056
|
-
#
|
|
1057
|
-
|
|
1058
|
-
|
|
875
|
+
# Temporarily reduce context to test tool execution
|
|
876
|
+
if body.get("question") and "distribution by file type" in body.get("question", "").lower():
|
|
877
|
+
logger.info("🔍 TEMP: Reducing context for tool execution test")
|
|
878
|
+
if "config" in body and "files" in body["config"]:
|
|
879
|
+
body["config"]["files"] = [] # Skip file context to avoid throttling
|
|
1059
880
|
|
|
1060
|
-
#
|
|
881
|
+
# Restore 0.3.0 direct streaming behavior
|
|
1061
882
|
use_direct_streaming = True
|
|
1062
|
-
|
|
883
|
+
|
|
884
|
+
logger.debug(f"🔍 STREAM_CHUNKS: use_direct_streaming = {use_direct_streaming}")
|
|
1063
885
|
|
|
1064
886
|
logger.info(f"🚀 DIRECT_STREAMING: Environment check = {use_direct_streaming}")
|
|
1065
|
-
logger.info(f"🚀 DIRECT_STREAMING: Import-time config = {USE_DIRECT_STREAMING}")
|
|
1066
887
|
logger.info(f"🚀 DIRECT_STREAMING: ZIYA_USE_DIRECT_STREAMING env var = '{os.getenv('ZIYA_USE_DIRECT_STREAMING', 'NOT_SET')}'")
|
|
1067
888
|
|
|
1068
889
|
# Check if we should use direct streaming
|
|
1069
890
|
if use_direct_streaming:
|
|
1070
|
-
logger.info("🚀 DIRECT_STREAMING: Using
|
|
1071
|
-
|
|
891
|
+
logger.info("🚀 DIRECT_STREAMING: Using StreamingToolExecutor for direct streaming")
|
|
892
|
+
logger.info(f"🔍 REQUEST_DEBUG: body keys = {list(body.keys())}")
|
|
893
|
+
logger.info(f"🔍 REQUEST_DEBUG: body = {body}")
|
|
894
|
+
|
|
895
|
+
# Extract data from body for StreamingToolExecutor
|
|
896
|
+
question = body.get("question", "")
|
|
897
|
+
chat_history = body.get("chat_history", [])
|
|
898
|
+
files = body.get("config", {}).get("files", [])
|
|
899
|
+
conversation_id = body.get("conversation_id")
|
|
900
|
+
|
|
901
|
+
logger.info(f"🔍 DIRECT_STREAMING_DEBUG: question='{question}', chat_history={len(chat_history)}, files={len(files)}")
|
|
902
|
+
|
|
903
|
+
if question:
|
|
904
|
+
try:
|
|
905
|
+
from app.streaming_tool_executor import StreamingToolExecutor
|
|
906
|
+
from app.agents.models import ModelManager
|
|
907
|
+
|
|
908
|
+
# Get current model state
|
|
909
|
+
state = ModelManager.get_state()
|
|
910
|
+
current_region = state.get('aws_region', 'us-east-1')
|
|
911
|
+
aws_profile = state.get('aws_profile', 'default')
|
|
912
|
+
endpoint = os.environ.get("ZIYA_ENDPOINT", "bedrock")
|
|
913
|
+
|
|
914
|
+
# Only use StreamingToolExecutor for Bedrock models
|
|
915
|
+
if endpoint != 'bedrock':
|
|
916
|
+
logger.info(f"🚀 DIRECT_STREAMING: Endpoint {endpoint} not supported by StreamingToolExecutor, falling back to LangChain")
|
|
917
|
+
raise ValueError(f"StreamingToolExecutor only supports bedrock endpoint, got {endpoint}")
|
|
918
|
+
|
|
919
|
+
logger.info(f"🔍 DIRECT_STREAMING_DEBUG: About to call build_messages_for_streaming with {len(files)} files")
|
|
920
|
+
# Build messages with full context using the same function as LangChain path - use langchain format like 0.3.0
|
|
921
|
+
logger.info(f"🔍 CALLING_BUILD_MESSAGES: About to call build_messages_for_streaming")
|
|
922
|
+
messages = build_messages_for_streaming(question, chat_history, files, conversation_id, use_langchain_format=True)
|
|
923
|
+
logger.info(f"🔍 DIRECT_STREAMING_PATH: Built {len(messages)} messages with full context")
|
|
924
|
+
|
|
925
|
+
# Debug the system message content
|
|
926
|
+
if messages and hasattr(messages[0], 'content'):
|
|
927
|
+
system_content_length = len(messages[0].content)
|
|
928
|
+
logger.info(f"🔍 DIRECT_STREAMING_DEBUG: System message length = {system_content_length}")
|
|
929
|
+
logger.info(f"🔍 DIRECT_STREAMING_DEBUG: System message preview = {messages[0].content[:200]}...")
|
|
930
|
+
|
|
931
|
+
executor = StreamingToolExecutor(profile_name=aws_profile, region=current_region)
|
|
932
|
+
logger.info(f"🚀 DIRECT_STREAMING: Created StreamingToolExecutor with profile={aws_profile}, region={current_region}")
|
|
933
|
+
|
|
934
|
+
# Send initial heartbeat
|
|
935
|
+
yield f"data: {json.dumps({'heartbeat': True, 'type': 'heartbeat'})}\n\n"
|
|
936
|
+
|
|
937
|
+
chunk_count = 0
|
|
938
|
+
async for chunk in executor.stream_with_tools(messages, conversation_id=conversation_id):
|
|
939
|
+
chunk_count += 1
|
|
940
|
+
|
|
941
|
+
# Convert to expected format and yield all chunk types
|
|
942
|
+
if chunk.get('type') == 'text':
|
|
943
|
+
content = chunk.get('content', '')
|
|
944
|
+
yield f"data: {json.dumps({'content': content})}\n\n"
|
|
945
|
+
elif chunk.get('type') == 'tool_start':
|
|
946
|
+
# Stream tool start notification
|
|
947
|
+
yield f"data: {json.dumps({'tool_start': chunk})}\n\n"
|
|
948
|
+
elif chunk.get('type') == 'tool_display':
|
|
949
|
+
logger.info(f"🔍 TOOL_DISPLAY: {chunk.get('tool_name')} completed")
|
|
950
|
+
# Stream tool result
|
|
951
|
+
yield f"data: {json.dumps({'tool_result': chunk})}\n\n"
|
|
952
|
+
elif chunk.get('type') == 'tool_execution': # Legacy support
|
|
953
|
+
logger.info(f"🔍 TOOL_EXECUTION (legacy): {chunk.get('tool_name')} completed")
|
|
954
|
+
elif chunk.get('type') == 'stream_end':
|
|
955
|
+
break
|
|
956
|
+
elif chunk.get('type') == 'error':
|
|
957
|
+
yield f"data: {json.dumps({'error': chunk.get('content', 'Unknown error')})}\n\n"
|
|
958
|
+
elif chunk.get('type') == 'tool_result_for_model':
|
|
959
|
+
# Don't stream to frontend - this is for model conversation only
|
|
960
|
+
logger.debug(f"Tool result for model conversation: {chunk.get('tool_use_id')}")
|
|
961
|
+
elif chunk.get('type') == 'iteration_continue':
|
|
962
|
+
# Send heartbeat to flush stream before next iteration
|
|
963
|
+
yield f"data: {json.dumps({'heartbeat': True, 'type': 'heartbeat'})}\n\n"
|
|
964
|
+
else:
|
|
965
|
+
logger.debug(f"Unknown chunk type: {chunk.get('type')}")
|
|
966
|
+
|
|
967
|
+
# Always send done message at the end
|
|
968
|
+
yield f"data: {json.dumps({'done': True})}\n\n"
|
|
969
|
+
|
|
970
|
+
logger.info(f"🚀 DIRECT_STREAMING: Completed streaming with {chunk_count} chunks")
|
|
971
|
+
return
|
|
972
|
+
|
|
973
|
+
except ValueError as ve:
|
|
974
|
+
# Expected error for non-Bedrock endpoints - fall through to LangChain silently
|
|
975
|
+
logger.info(f"🚀 DIRECT_STREAMING: {ve} - falling back to LangChain")
|
|
976
|
+
except Exception as e:
|
|
977
|
+
import traceback
|
|
978
|
+
error_details = traceback.format_exc()
|
|
979
|
+
logger.error(f"🚀 DIRECT_STREAMING: Error in StreamingToolExecutor: {e}")
|
|
980
|
+
logger.error(f"🚀 DIRECT_STREAMING: Full traceback:\n{error_details}")
|
|
981
|
+
# Fall through to LangChain path
|
|
982
|
+
|
|
983
|
+
logger.info("🚀 DIRECT_STREAMING: No question found or error occurred, falling back to LangChain")
|
|
984
|
+
|
|
985
|
+
# Build messages properly for non-Bedrock models
|
|
986
|
+
question = body.get("question", "")
|
|
987
|
+
chat_history = body.get("chat_history", [])
|
|
988
|
+
files = body.get("config", {}).get("files", [])
|
|
989
|
+
conversation_id = body.get("conversation_id")
|
|
990
|
+
|
|
991
|
+
if question:
|
|
992
|
+
messages = build_messages_for_streaming(question, chat_history, files, conversation_id, use_langchain_format=True)
|
|
993
|
+
logger.info(f"🔍 LANGCHAIN_PATH: Built {len(messages)} messages for non-Bedrock model")
|
|
994
|
+
else:
|
|
1072
995
|
|
|
1073
996
|
# Extract messages from body
|
|
1074
997
|
messages = []
|
|
@@ -1134,7 +1057,27 @@ async def stream_chunks(body):
|
|
|
1134
1057
|
|
|
1135
1058
|
# Format the system message
|
|
1136
1059
|
formatted_system_content = system_content.replace('{codebase}', codebase_content)
|
|
1137
|
-
|
|
1060
|
+
|
|
1061
|
+
# Check if MCP is actually enabled and has tools
|
|
1062
|
+
mcp_tools_text = "No tools available"
|
|
1063
|
+
# Check if MCP is enabled before loading tools
|
|
1064
|
+
if os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes"):
|
|
1065
|
+
try:
|
|
1066
|
+
mcp_manager = get_mcp_manager()
|
|
1067
|
+
if mcp_manager.is_initialized:
|
|
1068
|
+
available_tools = mcp_manager.get_all_tools()
|
|
1069
|
+
if available_tools:
|
|
1070
|
+
mcp_tools_text = f"MCP tools available: {', '.join([tool.name for tool in available_tools])}"
|
|
1071
|
+
else:
|
|
1072
|
+
mcp_tools_text = "MCP initialized but no tools available"
|
|
1073
|
+
else:
|
|
1074
|
+
mcp_tools_text = "MCP tools disabled"
|
|
1075
|
+
except Exception as e:
|
|
1076
|
+
mcp_tools_text = "MCP tools unavailable"
|
|
1077
|
+
else:
|
|
1078
|
+
mcp_tools_text = "MCP tools disabled"
|
|
1079
|
+
|
|
1080
|
+
formatted_system_content = formatted_system_content.replace('{tools}', mcp_tools_text)
|
|
1138
1081
|
|
|
1139
1082
|
messages.append({'type': 'system', 'content': formatted_system_content})
|
|
1140
1083
|
|
|
@@ -1164,66 +1107,72 @@ async def stream_chunks(body):
|
|
|
1164
1107
|
logger.debug(f"First message type: {messages[0].get('type', 'unknown')}")
|
|
1165
1108
|
logger.debug(f"System message length: {len(messages[0].get('content', '')) if messages[0].get('type') == 'system' else 'N/A'}")
|
|
1166
1109
|
# Create DirectStreamingAgent and stream
|
|
1167
|
-
try:
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1110
|
+
# try:
|
|
1111
|
+
# agent = DirectStreamingAgent()
|
|
1112
|
+
#
|
|
1113
|
+
# chunk_count = 0
|
|
1114
|
+
# tool_results_attempted = 0
|
|
1115
|
+
# total_data_sent = 0
|
|
1116
|
+
#
|
|
1117
|
+
# # Get available tools to pass to the agent
|
|
1118
|
+
# from app.mcp.enhanced_tools import create_secure_mcp_tools
|
|
1119
|
+
# mcp_tools = create_secure_mcp_tools()
|
|
1120
|
+
# logger.info(f"🚀 DIRECT_STREAMING: Passing {len(mcp_tools)} tools to DirectStreamingAgent")
|
|
1121
|
+
#
|
|
1122
|
+
# async for chunk in agent.stream_with_tools(messages, tools=mcp_tools, conversation_id=body.get('conversation_id')):
|
|
1123
|
+
# chunk_count += 1
|
|
1124
|
+
#
|
|
1125
|
+
# if chunk.get('type') == 'tool_execution':
|
|
1126
|
+
# tool_results_attempted += 1
|
|
1127
|
+
# logger.info(f"🔍 ATTEMPTING_TOOL_TRANSMISSION: #{tool_results_attempted} - {chunk.get('tool_name')}")
|
|
1128
|
+
#
|
|
1129
|
+
# # DEBUGGING: Test JSON serialization before transmission
|
|
1130
|
+
# try:
|
|
1131
|
+
# test_json = json.dumps(chunk)
|
|
1132
|
+
# json_size = len(test_json)
|
|
1133
|
+
# logger.info(f"🔍 JSON_SERIALIZATION: {chunk.get('tool_name')} serialized to {json_size} chars")
|
|
1134
|
+
#
|
|
1135
|
+
# if json_size > 100000: # 100KB
|
|
1136
|
+
# logger.warning(f"🔍 LARGE_JSON_PAYLOAD: {chunk.get('tool_name')} JSON is {json_size} chars")
|
|
1137
|
+
# if json_size > 1000000: # 1MB
|
|
1138
|
+
# logger.error(f"🔍 JSON_TOO_LARGE: {chunk.get('tool_name')} JSON is {json_size} chars - may break transmission")
|
|
1139
|
+
#
|
|
1140
|
+
# except Exception as json_error:
|
|
1141
|
+
# logger.error(f"🔍 JSON_SERIALIZATION_FAILED: {chunk.get('tool_name')} failed to serialize: {json_error}")
|
|
1142
|
+
# continue # Skip this chunk
|
|
1143
|
+
#
|
|
1144
|
+
# sse_data = f"data: {json.dumps(chunk)}\n\n"
|
|
1145
|
+
# chunk_size = len(sse_data)
|
|
1146
|
+
# total_data_sent += chunk_size
|
|
1147
|
+
#
|
|
1148
|
+
# # Log large chunks or tool results
|
|
1149
|
+
# if chunk.get('type') == 'tool_execution' or chunk_size > 1000:
|
|
1150
|
+
# logger.info(f"🔍 CHUNK_TRANSMISSION: chunk #{chunk_count}, type={chunk.get('type')}, size={chunk_size}, total_sent={total_data_sent}")
|
|
1151
|
+
# if chunk.get('type') == 'tool_execution':
|
|
1152
|
+
# logger.info(f"🔍 TOOL_CHUNK: tool_name={chunk.get('tool_name')}, result_size={len(chunk.get('result', ''))}")
|
|
1153
|
+
#
|
|
1154
|
+
# yield sse_data
|
|
1155
|
+
#
|
|
1156
|
+
# # Force immediate delivery for tool results
|
|
1157
|
+
# if chunk.get('type') == 'tool_execution':
|
|
1158
|
+
# import sys
|
|
1159
|
+
# sys.stdout.flush()
|
|
1160
|
+
#
|
|
1161
|
+
# yield "data: [DONE]\n\n"
|
|
1162
|
+
# return
|
|
1163
|
+
# except CredentialRetrievalError as e:
|
|
1164
|
+
# # Handle credential errors (including mwinit failures) with proper SSE error response
|
|
1165
|
+
# from app.utils.error_handlers import handle_streaming_error
|
|
1166
|
+
# async for error_chunk in handle_streaming_error(None, e):
|
|
1167
|
+
# yield error_chunk
|
|
1168
|
+
# return
|
|
1169
|
+
# except ValueError as e:
|
|
1170
|
+
# if "OpenAI models should use LangChain path" in str(e):
|
|
1171
|
+
# logger.info("🚀 DIRECT_STREAMING: OpenAI model detected, falling back to LangChain path")
|
|
1172
|
+
# # Fall through to LangChain path below
|
|
1173
|
+
# else:
|
|
1174
|
+
# raise
|
|
1175
|
+
pass # DirectStreamingAgent disabled
|
|
1227
1176
|
|
|
1228
1177
|
# Check if model should use LangChain path instead of StreamingToolExecutor
|
|
1229
1178
|
from app.agents.models import ModelManager
|
|
@@ -1288,8 +1237,7 @@ async def stream_chunks(body):
|
|
|
1288
1237
|
if hasattr(chunk, 'content') and chunk.content:
|
|
1289
1238
|
content_str = chunk.content
|
|
1290
1239
|
if content_str:
|
|
1291
|
-
|
|
1292
|
-
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
1240
|
+
yield f"data: {json.dumps({'content': content_str})}\n\n"
|
|
1293
1241
|
|
|
1294
1242
|
yield f"data: {json.dumps({'done': True})}\n\n"
|
|
1295
1243
|
return
|
|
@@ -1298,12 +1246,9 @@ async def stream_chunks(body):
|
|
|
1298
1246
|
logger.error(f"🚀 DIRECT_STREAMING: Error in OpenAI message construction: {e}")
|
|
1299
1247
|
# Fall through to regular LangChain path
|
|
1300
1248
|
else:
|
|
1301
|
-
#
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
chat_history = body.get("chat_history", [])
|
|
1305
|
-
config_data = body.get("config", {})
|
|
1306
|
-
files = config_data.get("files", [])
|
|
1249
|
+
# DISABLED: Redundant StreamingToolExecutor path - causes duplicate execution
|
|
1250
|
+
logger.info("🚀 DIRECT_STREAMING: Skipping redundant StreamingToolExecutor path - using primary path only")
|
|
1251
|
+
pass
|
|
1307
1252
|
|
|
1308
1253
|
# Debug: Log what we received
|
|
1309
1254
|
logger.debug(f"Received question: '{question}'")
|
|
@@ -1344,43 +1289,42 @@ async def stream_chunks(body):
|
|
|
1344
1289
|
|
|
1345
1290
|
# Get available tools including MCP tools
|
|
1346
1291
|
tools = []
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
logger.debug(
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
tools
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1292
|
+
|
|
1293
|
+
# Check if MCP is enabled before loading tools
|
|
1294
|
+
if not os.environ.get("ZIYA_ENABLE_MCP", "true").lower() in ("true", "1", "yes"):
|
|
1295
|
+
logger.debug("MCP is disabled, no tools will be loaded")
|
|
1296
|
+
else:
|
|
1297
|
+
try:
|
|
1298
|
+
from app.mcp.manager import get_mcp_manager
|
|
1299
|
+
mcp_manager = get_mcp_manager()
|
|
1300
|
+
logger.debug(f"MCP manager initialized: {mcp_manager.is_initialized}")
|
|
1301
|
+
if mcp_manager.is_initialized:
|
|
1302
|
+
# Convert MCP tools to Bedrock format
|
|
1303
|
+
mcp_tools = mcp_manager.get_all_tools()
|
|
1304
|
+
logger.debug(f"Found {len(mcp_tools)} MCP tools")
|
|
1305
|
+
for tool in mcp_tools:
|
|
1306
|
+
logger.debug(f"MCP tool: {tool.name}")
|
|
1307
|
+
tools.append({
|
|
1308
|
+
'name': tool.name,
|
|
1309
|
+
'description': tool.description,
|
|
1310
|
+
'input_schema': getattr(tool, 'inputSchema', getattr(tool, 'input_schema', {}))
|
|
1311
|
+
})
|
|
1312
|
+
except Exception as e:
|
|
1313
|
+
logger.debug(f"MCP tool loading error: {e}")
|
|
1314
|
+
logger.warning(f"Could not get MCP tools: {e}")
|
|
1365
1315
|
|
|
1366
1316
|
# Add shell tool if no MCP tools available
|
|
1367
1317
|
if not tools:
|
|
1368
1318
|
logger.debug("No MCP tools found, using shell tool")
|
|
1369
|
-
from app.agents.direct_streaming import get_shell_tool_schema
|
|
1370
|
-
tools = [get_shell_tool_schema()]
|
|
1319
|
+
# from app.agents.direct_streaming import get_shell_tool_schema
|
|
1320
|
+
# tools = [get_shell_tool_schema()]
|
|
1321
|
+
logger.debug("Shell tool functionality not available")
|
|
1371
1322
|
else:
|
|
1372
1323
|
logger.debug(f"Using {len(tools)} tools: {[t['name'] for t in tools]}")
|
|
1373
1324
|
|
|
1374
|
-
#
|
|
1375
|
-
async for chunk in executor.stream_with_tools(messages, tools):
|
|
1376
|
-
|
|
1377
|
-
if chunk.get('type') == 'tool_start':
|
|
1378
|
-
logger.info(f"🔧 SERVER: Yielding tool_start chunk: {chunk}")
|
|
1379
|
-
elif chunk.get('type') == 'tool_execution':
|
|
1380
|
-
logger.info(f"🔧 SERVER: Yielding tool_execution chunk: {chunk.get('tool_name')}")
|
|
1381
|
-
yield f"data: {json.dumps(chunk)}\n\n"
|
|
1382
|
-
|
|
1383
|
-
# Return after successful streaming
|
|
1325
|
+
# DISABLED: Redundant StreamingToolExecutor call - causes duplicate execution
|
|
1326
|
+
# async for chunk in executor.stream_with_tools(messages, tools):
|
|
1327
|
+
logger.info("🚀 DIRECT_STREAMING: Skipping redundant StreamingToolExecutor call")
|
|
1384
1328
|
return
|
|
1385
1329
|
|
|
1386
1330
|
except Exception as e:
|
|
@@ -1460,16 +1404,9 @@ async def stream_chunks(body):
|
|
|
1460
1404
|
|
|
1461
1405
|
logger.debug(f"Built {len(messages)} messages for Nova StreamingToolExecutor")
|
|
1462
1406
|
|
|
1463
|
-
#
|
|
1464
|
-
async for chunk in executor.stream_with_tools(messages):
|
|
1465
|
-
|
|
1466
|
-
if chunk.get('type') == 'tool_start':
|
|
1467
|
-
logger.info(f"🔧 SERVER_NOVA: Yielding tool_start chunk: {chunk}")
|
|
1468
|
-
elif chunk.get('type') == 'tool_execution':
|
|
1469
|
-
logger.info(f"🔧 SERVER_NOVA: Yielding tool_execution chunk: {chunk.get('tool_name')}")
|
|
1470
|
-
yield f"data: {json.dumps(chunk)}\n\n"
|
|
1471
|
-
|
|
1472
|
-
yield f"data: {json.dumps({'done': True})}\n\n"
|
|
1407
|
+
# DISABLED: Redundant Nova StreamingToolExecutor call - causes duplicate execution
|
|
1408
|
+
# async for chunk in executor.stream_with_tools(messages):
|
|
1409
|
+
logger.info("🚀 DIRECT_STREAMING: Skipping redundant Nova StreamingToolExecutor call")
|
|
1473
1410
|
return
|
|
1474
1411
|
|
|
1475
1412
|
except Exception as e:
|
|
@@ -1639,8 +1576,8 @@ async def stream_chunks(body):
|
|
|
1639
1576
|
break
|
|
1640
1577
|
|
|
1641
1578
|
if agent_chain:
|
|
1642
|
-
logger.info("🔍 STREAM_CHUNKS: Using
|
|
1643
|
-
# Use
|
|
1579
|
+
logger.info("🔍 STREAM_CHUNKS: Using agent chain with file context")
|
|
1580
|
+
# Use agent chain with proper file context
|
|
1644
1581
|
try:
|
|
1645
1582
|
input_data = {
|
|
1646
1583
|
"question": question,
|
|
@@ -1648,7 +1585,7 @@ async def stream_chunks(body):
|
|
|
1648
1585
|
"chat_history": chat_history,
|
|
1649
1586
|
"config": {
|
|
1650
1587
|
"conversation_id": conversation_id,
|
|
1651
|
-
"files":
|
|
1588
|
+
"files": files # Include the actual files
|
|
1652
1589
|
}
|
|
1653
1590
|
}
|
|
1654
1591
|
|
|
@@ -1656,13 +1593,13 @@ async def stream_chunks(body):
|
|
|
1656
1593
|
response_content = result.get("output", "")
|
|
1657
1594
|
|
|
1658
1595
|
# Stream the response
|
|
1659
|
-
yield f"data: {json.dumps({'type': 'text', 'content': response_content})}
|
|
1660
|
-
yield f"data: {json.dumps({'type': 'done'})}
|
|
1596
|
+
yield f"data: {json.dumps({'type': 'text', 'content': response_content})}\n\n"
|
|
1597
|
+
yield f"data: {json.dumps({'type': 'done'})}\n\n"
|
|
1661
1598
|
return
|
|
1662
1599
|
|
|
1663
1600
|
except Exception as e:
|
|
1664
|
-
logger.error(f"
|
|
1665
|
-
# Fall back to
|
|
1601
|
+
logger.error(f"Agent chain failed: {e}")
|
|
1602
|
+
# Fall back to direct model approach
|
|
1666
1603
|
|
|
1667
1604
|
# Use the messages that were already built correctly above with build_messages_for_streaming()
|
|
1668
1605
|
# Don't rebuild them here - this was causing the context history loss for OpenAI models
|
|
@@ -1698,6 +1635,8 @@ async def stream_chunks(body):
|
|
|
1698
1635
|
|
|
1699
1636
|
token_throttling_retries = 0
|
|
1700
1637
|
max_token_throttling_retries = 2 # Allow 2 fresh connection attempts
|
|
1638
|
+
within_stream_retries = 0
|
|
1639
|
+
max_within_stream_retries = 3 # Quick retries within same stream first
|
|
1701
1640
|
|
|
1702
1641
|
# Context overflow detection state
|
|
1703
1642
|
overflow_checked = False
|
|
@@ -1714,6 +1653,8 @@ async def stream_chunks(body):
|
|
|
1714
1653
|
logger.info(f"🔍 STREAM_CHUNKS: Created {len(mcp_tools)} MCP tools for iteration")
|
|
1715
1654
|
except Exception as e:
|
|
1716
1655
|
logger.warning(f"Failed to get MCP tools for iteration: {e}")
|
|
1656
|
+
# Allow tool calls to complete - only stop at the END of tool calls
|
|
1657
|
+
model_with_stop = model_instance.bind(stop=["</TOOL_SENTINEL>"])
|
|
1717
1658
|
logger.info(f"🔍 STREAM_CHUNKS: model_with_stop type: {type(model_with_stop)}")
|
|
1718
1659
|
|
|
1719
1660
|
# Agent iteration loop for tool execution
|
|
@@ -1732,10 +1673,11 @@ async def stream_chunks(body):
|
|
|
1732
1673
|
|
|
1733
1674
|
current_response = ""
|
|
1734
1675
|
tool_executed = False
|
|
1676
|
+
tool_execution_completed = False # Initialize the variable
|
|
1735
1677
|
|
|
1736
1678
|
try:
|
|
1737
|
-
# Use model
|
|
1738
|
-
model_to_use =
|
|
1679
|
+
# Use model instance for tool detection
|
|
1680
|
+
model_to_use = model_instance
|
|
1739
1681
|
logger.info(f"🔍 AGENT ITERATION {iteration}: Available tools: {[tool.name for tool in mcp_tools] if mcp_tools else 'No tools'}")
|
|
1740
1682
|
|
|
1741
1683
|
# Track if we're currently inside a tool call across chunks
|
|
@@ -1743,12 +1685,16 @@ async def stream_chunks(body):
|
|
|
1743
1685
|
tool_call_buffer = ""
|
|
1744
1686
|
tool_call_detected = False # Flag to suppress ALL output after tool detection
|
|
1745
1687
|
pending_tool_execution = False # Flag to indicate we need to execute tools
|
|
1746
|
-
buffered_content = "" # Buffer ALL content after tool call detection
|
|
1747
|
-
tool_execution_completed = False # Track if we've executed and need model to continue
|
|
1748
1688
|
|
|
1749
|
-
#
|
|
1750
|
-
|
|
1751
|
-
|
|
1689
|
+
# DISABLED for Bedrock: LangChain streaming path - causes duplicate execution with StreamingToolExecutor
|
|
1690
|
+
# But ENABLED for non-Bedrock endpoints like Google
|
|
1691
|
+
endpoint = os.environ.get("ZIYA_ENDPOINT", "bedrock")
|
|
1692
|
+
if endpoint == "bedrock":
|
|
1693
|
+
logger.info("🚀 DIRECT_STREAMING: LangChain path disabled for Bedrock - using StreamingToolExecutor only")
|
|
1694
|
+
return
|
|
1695
|
+
|
|
1696
|
+
# Stream from model for non-Bedrock endpoints (use simple streaming like 0.3.0)
|
|
1697
|
+
async for chunk in model_instance.astream(messages):
|
|
1752
1698
|
# Log the actual messages being sent to model on first iteration
|
|
1753
1699
|
if iteration == 1 and not hasattr(stream_chunks, '_logged_model_input'):
|
|
1754
1700
|
stream_chunks._logged_model_input = True
|
|
@@ -1772,6 +1718,23 @@ async def stream_chunks(body):
|
|
|
1772
1718
|
if not connection_active:
|
|
1773
1719
|
logger.info("Connection lost during agent iteration")
|
|
1774
1720
|
break
|
|
1721
|
+
|
|
1722
|
+
# Handle dict chunks from DirectGoogleModel
|
|
1723
|
+
if isinstance(chunk, dict):
|
|
1724
|
+
if chunk.get('type') == 'text':
|
|
1725
|
+
content_str = chunk.get('content', '')
|
|
1726
|
+
if content_str:
|
|
1727
|
+
current_response += content_str
|
|
1728
|
+
ops = [{"op": "add", "path": "/streamed_output_str/-", "value": content_str}]
|
|
1729
|
+
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
1730
|
+
chunk_count += 1
|
|
1731
|
+
elif chunk.get('type') == 'error':
|
|
1732
|
+
error_msg = chunk.get('content', 'Unknown error')
|
|
1733
|
+
yield f"data: {json.dumps({'error': error_msg})}\n\n"
|
|
1734
|
+
yield f"data: {json.dumps({'done': True})}\n\n"
|
|
1735
|
+
return
|
|
1736
|
+
continue
|
|
1737
|
+
|
|
1775
1738
|
# Process chunk content - always process chunks, don't check for 'content' attribute first
|
|
1776
1739
|
|
|
1777
1740
|
# Check if this is an error response chunk
|
|
@@ -1825,11 +1788,10 @@ async def stream_chunks(body):
|
|
|
1825
1788
|
# Stream the completed part
|
|
1826
1789
|
|
|
1827
1790
|
# Add visual marker that continuation is happening
|
|
1828
|
-
|
|
1829
|
-
yield f"data: {json.dumps({'
|
|
1791
|
+
marker_msg = "\n\n---\\n**⏳ Response is long, preparing continuation...**\\n---\n\n"
|
|
1792
|
+
yield f"data: {json.dumps({'content': marker_msg})}\n\n"
|
|
1830
1793
|
|
|
1831
|
-
|
|
1832
|
-
yield f"data: {json.dumps({'ops': completed_ops})}\n\n"
|
|
1794
|
+
yield f"data: {json.dumps({'content': overflow_info['completed_response']})}\n\n"
|
|
1833
1795
|
|
|
1834
1796
|
# Start continuation
|
|
1835
1797
|
async for continuation_chunk in handle_continuation(overflow_info):
|
|
@@ -1853,21 +1815,17 @@ async def stream_chunks(body):
|
|
|
1853
1815
|
ops = [{"op": "add", "path": "/reasoning_content/-", "value": reasoning}]
|
|
1854
1816
|
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
1855
1817
|
|
|
1856
|
-
#
|
|
1857
|
-
#
|
|
1858
|
-
if
|
|
1818
|
+
# Check for complete tool calls - need both opening and closing sentinels
|
|
1819
|
+
# and proper structure with name and arguments
|
|
1820
|
+
if ("<TOOL_SENTINEL>" in current_response and
|
|
1821
|
+
"</TOOL_SENTINEL>" in current_response and
|
|
1822
|
+
"<name>" in current_response and
|
|
1823
|
+
"</name>" in current_response and
|
|
1824
|
+
"<arguments>" in current_response and
|
|
1825
|
+
"</arguments>" in current_response):
|
|
1859
1826
|
tool_call_detected = True
|
|
1860
|
-
logger.info(f"🔍 STREAM:
|
|
1861
|
-
|
|
1862
|
-
# This prevents the model from generating multiple tool calls
|
|
1863
|
-
logger.info("🔍 STREAM: BREAKING IMMEDIATELY after detecting tool marker")
|
|
1864
|
-
# Force close the stream to prevent hanging
|
|
1865
|
-
if hasattr(stream_generator, 'aclose'):
|
|
1866
|
-
try:
|
|
1867
|
-
await stream_generator.aclose()
|
|
1868
|
-
except:
|
|
1869
|
-
pass
|
|
1870
|
-
break # Exit the streaming loop immediately
|
|
1827
|
+
logger.info(f"🔍 STREAM: Complete tool call detected, stopping stream")
|
|
1828
|
+
break
|
|
1871
1829
|
|
|
1872
1830
|
# If we've just executed tools, the model should now be generating the response
|
|
1873
1831
|
if tool_execution_completed:
|
|
@@ -1883,11 +1841,26 @@ async def stream_chunks(body):
|
|
|
1883
1841
|
if TOOL_SENTINEL_OPEN in content_str:
|
|
1884
1842
|
inside_tool_call = True
|
|
1885
1843
|
tool_call_buffer = ""
|
|
1886
|
-
# Stream any content before the tool call
|
|
1844
|
+
# Stream any content before the tool call
|
|
1887
1845
|
before_tool = content_str[:content_str.find(TOOL_SENTINEL_OPEN)]
|
|
1888
1846
|
if before_tool:
|
|
1889
|
-
|
|
1890
|
-
|
|
1847
|
+
text_msg = {
|
|
1848
|
+
'type': 'text',
|
|
1849
|
+
'content': before_tool
|
|
1850
|
+
}
|
|
1851
|
+
yield f"data: {json.dumps(text_msg)}\n\n"
|
|
1852
|
+
import asyncio
|
|
1853
|
+
await asyncio.sleep(0.01) # Longer delay to prevent batching
|
|
1854
|
+
|
|
1855
|
+
# Send tool_start message
|
|
1856
|
+
tool_start_msg = {
|
|
1857
|
+
'type': 'tool_start',
|
|
1858
|
+
'message': 'Tool execution starting...'
|
|
1859
|
+
}
|
|
1860
|
+
yield f"data: {json.dumps(tool_start_msg)}\n\n"
|
|
1861
|
+
logger.info("🔍 STREAM: Sent tool_start message to frontend")
|
|
1862
|
+
await asyncio.sleep(0.01) # Delay after tool_start
|
|
1863
|
+
|
|
1891
1864
|
tool_call_detected = True # Set flag to suppress all further output
|
|
1892
1865
|
buffered_content = "" # Start buffering from tool call
|
|
1893
1866
|
logger.info("🔍 STREAM: Entering tool call - suppressing all output")
|
|
@@ -1906,7 +1879,13 @@ async def stream_chunks(body):
|
|
|
1906
1879
|
continue
|
|
1907
1880
|
|
|
1908
1881
|
else:
|
|
1909
|
-
|
|
1882
|
+
# Extract content properly from LangChain chunks
|
|
1883
|
+
if hasattr(chunk, 'content'):
|
|
1884
|
+
content_str = chunk.content
|
|
1885
|
+
elif hasattr(chunk, 'text'):
|
|
1886
|
+
content_str = chunk.text
|
|
1887
|
+
else:
|
|
1888
|
+
content_str = ""
|
|
1910
1889
|
if not content_str:
|
|
1911
1890
|
continue
|
|
1912
1891
|
|
|
@@ -1950,73 +1929,40 @@ async def stream_chunks(body):
|
|
|
1950
1929
|
# Check if the current code block is a diff block
|
|
1951
1930
|
is_in_diff_block = in_code_block and '```diff' in current_response
|
|
1952
1931
|
|
|
1932
|
+
# Ultra-aggressive tool suppression - catch any fragment that could be part of a tool call
|
|
1953
1933
|
should_suppress = (
|
|
1954
|
-
# Only suppress if not in a diff block AND matches suppression patterns
|
|
1955
1934
|
not is_in_diff_block and (
|
|
1956
1935
|
inside_tool_call or
|
|
1957
|
-
TOOL_SENTINEL_OPEN in
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
'
|
|
1964
|
-
'<
|
|
1965
|
-
'</
|
|
1966
|
-
|
|
1967
|
-
'
|
|
1968
|
-
'
|
|
1969
|
-
'
|
|
1970
|
-
'
|
|
1971
|
-
|
|
1972
|
-
'
|
|
1973
|
-
|
|
1974
|
-
('
|
|
1975
|
-
('": "' in content_str and current_response.count(TOOL_SENTINEL_OPEN) > 0) or # Catch ": "1"
|
|
1976
|
-
('"pwd"' in content_str and current_response.count(TOOL_SENTINEL_OPEN) > 0) or
|
|
1977
|
-
"mcp_" in content_str and ("\"command\"" in content_str or "\"format\"" in content_str or "\"timeout\"" in content_str) or
|
|
1978
|
-
# Enhanced tool call detection
|
|
1979
|
-
content_str.strip().startswith("mcp_") or
|
|
1980
|
-
"mcp_run" in content_str or
|
|
1981
|
-
"mcp_get" in content_str or
|
|
1982
|
-
# Catch argument patterns (only suppress if inside tool call)
|
|
1983
|
-
("\"comman" in content_str) or # Catch partial "command"
|
|
1984
|
-
("d\": \"" in content_str and tool_call_detected) or # Catch command value
|
|
1985
|
-
("pwd\"" in content_str and tool_call_detected) or # Catch pwd command
|
|
1986
|
-
("timeout" in content_str and tool_call_detected) or # Catch timeout
|
|
1987
|
-
(": \"" in content_str and tool_call_detected) or # Catch JSON patterns
|
|
1988
|
-
("\"command\":" in content_str and TOOL_SENTINEL_OPEN in current_response and not tool_executed) or
|
|
1989
|
-
("\"format\":" in content_str and TOOL_SENTINEL_OPEN in current_response and not tool_executed) or
|
|
1990
|
-
("\"timeout\":" in content_str and TOOL_SENTINEL_OPEN in current_response and not tool_executed) or
|
|
1991
|
-
# Catch partial tool fragments that leak through
|
|
1992
|
-
content_str.strip().endswith(">mcp_") or
|
|
1993
|
-
content_str.strip().endswith("1>") or
|
|
1994
|
-
# Catch mixed tool content
|
|
1995
|
-
("mcp_" in content_str and any(char in content_str for char in ["\"", ":", "{", "}"])) or
|
|
1996
|
-
any(marker in content_str for marker in ["<name>", "</name>", "<arguments>", "</arguments>"]) or
|
|
1997
|
-
# NEW: Catch specific leaked fragments we're seeing in frontend
|
|
1998
|
-
"_run_shell_command" in content_str or
|
|
1999
|
-
"_run_" in content_str or
|
|
2000
|
-
"shell_command" in content_str or
|
|
2001
|
-
"</name" in content_str or
|
|
2002
|
-
"command\":" in content_str or
|
|
2003
|
-
"timeout\":" in content_str or
|
|
2004
|
-
# Catch JSON-like fragments
|
|
2005
|
-
(content_str.strip().startswith('\"') and ('command' in content_str or 'timeout' in content_str)) or
|
|
2006
|
-
# Catch partial XML closing tags
|
|
2007
|
-
content_str.strip().endswith('</') or
|
|
2008
|
-
content_str.strip().startswith('</') or
|
|
2009
|
-
# Catch comma-separated values that look like JSON
|
|
2010
|
-
(content_str.strip().endswith(',') and ('command' in content_str or 'timeout' in content_str)) or
|
|
2011
|
-
# CRITICAL: Suppress ALL content after tool calls but before execution
|
|
2012
|
-
# This prevents hallucinated responses from leaking through
|
|
2013
|
-
has_pending_tools
|
|
1936
|
+
TOOL_SENTINEL_OPEN in current_response or # If we've seen the start of a tool call anywhere
|
|
1937
|
+
'<TOOL' in content_str or # Catch partial tool sentinels
|
|
1938
|
+
'TOOL_' in content_str or # Catch fragments like "_modules.\n\n<TOOL_"
|
|
1939
|
+
'</TOOL' in content_str or
|
|
1940
|
+
'SENTINEL' in content_str or
|
|
1941
|
+
'<name>' in content_str or
|
|
1942
|
+
'</name>' in content_str or
|
|
1943
|
+
'<arguments>' in content_str or
|
|
1944
|
+
'</arguments>' in content_str or
|
|
1945
|
+
'mcp_run_shell_command' in content_str or
|
|
1946
|
+
'mcp_get_current_time' in content_str or
|
|
1947
|
+
('"command"' in content_str and TOOL_SENTINEL_OPEN in current_response) or
|
|
1948
|
+
('"timeout"' in content_str and TOOL_SENTINEL_OPEN in current_response) or
|
|
1949
|
+
('find .' in content_str and TOOL_SENTINEL_OPEN in current_response) or
|
|
1950
|
+
# Catch split fragments
|
|
1951
|
+
content_str.strip().endswith('<TOOL') or
|
|
1952
|
+
content_str.strip().endswith('_modules.\n\n<TOOL') or
|
|
1953
|
+
content_str.strip().startswith('_') and TOOL_SENTINEL_OPEN in current_response
|
|
2014
1954
|
)
|
|
2015
1955
|
)
|
|
2016
|
-
|
|
1956
|
+
|
|
2017
1957
|
if not should_suppress:
|
|
2018
|
-
|
|
2019
|
-
|
|
1958
|
+
text_msg = {
|
|
1959
|
+
'type': 'text',
|
|
1960
|
+
'content': content_str
|
|
1961
|
+
}
|
|
1962
|
+
yield f"data: {json.dumps(text_msg)}\n\n"
|
|
1963
|
+
# Force task scheduling to ensure individual processing
|
|
1964
|
+
import asyncio
|
|
1965
|
+
await asyncio.sleep(0)
|
|
2020
1966
|
else:
|
|
2021
1967
|
logger.debug(f"🔍 AGENT: Suppressed tool call content from frontend")
|
|
2022
1968
|
# Check for tool calls and execute when model has finished generating them
|
|
@@ -2075,10 +2021,15 @@ async def stream_chunks(body):
|
|
|
2075
2021
|
tool_blocks.append(tool_block)
|
|
2076
2022
|
start_pos = tool_end + 3
|
|
2077
2023
|
|
|
2078
|
-
# Stream tool results to frontend
|
|
2024
|
+
# Stream tool results to frontend with proper message type
|
|
2079
2025
|
for tool_block in tool_blocks:
|
|
2080
|
-
tool_result = "
|
|
2081
|
-
|
|
2026
|
+
tool_result = "\\n" + tool_block + "\\n"
|
|
2027
|
+
tool_execution_msg = {
|
|
2028
|
+
'type': 'tool_display',
|
|
2029
|
+
'content': tool_result,
|
|
2030
|
+
'tool_name': 'mcp_tool'
|
|
2031
|
+
}
|
|
2032
|
+
yield f"data: {json.dumps(tool_execution_msg)}\n\n"
|
|
2082
2033
|
# Don't send markdown tool blocks when we're using structured tool_execution events
|
|
2083
2034
|
# The structured events are already handled by the frontend
|
|
2084
2035
|
logger.info(f"🔍 STREAM: Skipping markdown tool block (using structured events)")
|
|
@@ -2118,8 +2069,7 @@ async def stream_chunks(body):
|
|
|
2118
2069
|
except Exception as tool_error:
|
|
2119
2070
|
logger.error(f"🔍 STREAM: Tool execution error: {tool_error}")
|
|
2120
2071
|
error_msg = f"**Tool Error:** {str(tool_error)}"
|
|
2121
|
-
|
|
2122
|
-
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
2072
|
+
yield f"data: {json.dumps({'content': error_msg})}\n\n"
|
|
2123
2073
|
tool_executed = True
|
|
2124
2074
|
tool_call_detected = False
|
|
2125
2075
|
pending_tool_execution = False
|
|
@@ -2146,11 +2096,15 @@ async def stream_chunks(body):
|
|
|
2146
2096
|
tool_blocks.append(tool_block)
|
|
2147
2097
|
start_pos = tool_end + 3
|
|
2148
2098
|
|
|
2149
|
-
# Stream ALL tool results to frontend
|
|
2099
|
+
# Stream ALL tool results to frontend with proper message type
|
|
2150
2100
|
for tool_block in tool_blocks:
|
|
2151
|
-
tool_result = "
|
|
2152
|
-
|
|
2153
|
-
|
|
2101
|
+
tool_result = "\\n" + tool_block + "\\n"
|
|
2102
|
+
tool_execution_msg = {
|
|
2103
|
+
'type': 'tool_display',
|
|
2104
|
+
'content': tool_result,
|
|
2105
|
+
'tool_name': 'mcp_tool'
|
|
2106
|
+
}
|
|
2107
|
+
yield f"data: {json.dumps(tool_execution_msg)}\n\n"
|
|
2154
2108
|
logger.info(f"🔍 STREAM: Tool result streamed: {tool_result[:50]}...")
|
|
2155
2109
|
|
|
2156
2110
|
# Add ALL tool results to conversation context for model continuation
|
|
@@ -2195,8 +2149,7 @@ async def stream_chunks(body):
|
|
|
2195
2149
|
except Exception as tool_error:
|
|
2196
2150
|
logger.error(f"🔍 STREAM: Tool execution error: {tool_error}")
|
|
2197
2151
|
error_msg = f"**Tool Error:** {str(tool_error)}"
|
|
2198
|
-
|
|
2199
|
-
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
2152
|
+
yield f"data: {json.dumps({'content': error_msg})}\n\n"
|
|
2200
2153
|
tool_executed = True
|
|
2201
2154
|
|
|
2202
2155
|
logger.info(f"🔍 AGENT: Finished streaming loop for iteration {iteration}")
|
|
@@ -2246,11 +2199,15 @@ async def stream_chunks(body):
|
|
|
2246
2199
|
tool_blocks.append(tool_block)
|
|
2247
2200
|
start_pos = tool_end + 3
|
|
2248
2201
|
|
|
2249
|
-
# Stream ALL tool results to frontend
|
|
2202
|
+
# Stream ALL tool results to frontend with proper message type
|
|
2250
2203
|
for tool_block in tool_blocks:
|
|
2251
|
-
tool_result = "
|
|
2252
|
-
|
|
2253
|
-
|
|
2204
|
+
tool_result = "\\n" + tool_block + "\\n"
|
|
2205
|
+
tool_execution_msg = {
|
|
2206
|
+
'type': 'tool_display',
|
|
2207
|
+
'content': tool_result,
|
|
2208
|
+
'tool_name': 'mcp_tool'
|
|
2209
|
+
}
|
|
2210
|
+
yield f"data: {json.dumps(tool_execution_msg)}\n\n"
|
|
2254
2211
|
logger.info(f"🔍 STREAM: Tool result streamed: {tool_result[:50]}...")
|
|
2255
2212
|
|
|
2256
2213
|
# Add ALL tool results to conversation context for model continuation
|
|
@@ -2336,13 +2293,79 @@ async def stream_chunks(body):
|
|
|
2336
2293
|
logger.error(f"Error in agent iteration {iteration}: {str(e)}", exc_info=True)
|
|
2337
2294
|
processed_response = current_response # Initialize before use
|
|
2338
2295
|
|
|
2296
|
+
# Handle timeout errors with retry logic
|
|
2297
|
+
error_str = str(e)
|
|
2298
|
+
is_timeout_error = ("Read timeout" in error_str or
|
|
2299
|
+
"ReadTimeoutError" in error_str or
|
|
2300
|
+
"timeout" in error_str.lower())
|
|
2339
2301
|
|
|
2340
2302
|
# Check for token-based throttling specifically
|
|
2341
|
-
error_str = str(e)
|
|
2342
2303
|
is_token_throttling = ("Too many tokens" in error_str and
|
|
2343
2304
|
"ThrottlingException" in error_str and
|
|
2344
2305
|
"reached max retries" in error_str)
|
|
2345
2306
|
|
|
2307
|
+
# Use two-tier retry: first within stream, then new stream
|
|
2308
|
+
if (is_timeout_error or is_token_throttling):
|
|
2309
|
+
# Tier 1: Quick retries within same stream
|
|
2310
|
+
if within_stream_retries < max_within_stream_retries:
|
|
2311
|
+
within_stream_retries += 1
|
|
2312
|
+
wait_time = min(2 ** within_stream_retries, 8) # 2s, 4s, 8s
|
|
2313
|
+
error_type = "timeout" if is_timeout_error else "token throttling"
|
|
2314
|
+
|
|
2315
|
+
logger.info(f"🔄 WITHIN-STREAM: {error_type} retry {within_stream_retries}/{max_within_stream_retries} in {wait_time}s")
|
|
2316
|
+
|
|
2317
|
+
retry_msg = f"\\n🔄 {error_type.title()} detected, retrying in {wait_time}s...\\n"
|
|
2318
|
+
yield f"data: {json.dumps({'content': retry_msg})}\n\n"
|
|
2319
|
+
|
|
2320
|
+
await asyncio.sleep(wait_time)
|
|
2321
|
+
|
|
2322
|
+
# Retry same iteration within stream
|
|
2323
|
+
iteration -= 1
|
|
2324
|
+
if iteration < 1:
|
|
2325
|
+
iteration = 1
|
|
2326
|
+
continue
|
|
2327
|
+
|
|
2328
|
+
# Tier 2: Fresh connection/new stream
|
|
2329
|
+
elif token_throttling_retries < max_token_throttling_retries:
|
|
2330
|
+
token_throttling_retries += 1
|
|
2331
|
+
within_stream_retries = 0 # Reset within-stream counter
|
|
2332
|
+
wait_time = min(10 * (2 ** (token_throttling_retries - 1)), 30) # 10s, 20s, 30s
|
|
2333
|
+
error_type = "timeout" if is_timeout_error else "token throttling"
|
|
2334
|
+
|
|
2335
|
+
logger.info(f"🔄 NEW-STREAM: {error_type} retry {token_throttling_retries}/{max_token_throttling_retries} with fresh connection in {wait_time}s")
|
|
2336
|
+
|
|
2337
|
+
fresh_conn_msg = f"\\n🔄 Starting fresh connection... (attempt {token_throttling_retries}/{max_token_throttling_retries})\\n"
|
|
2338
|
+
yield f"data: {json.dumps({'content': fresh_conn_msg})}\n\n"
|
|
2339
|
+
|
|
2340
|
+
await asyncio.sleep(wait_time)
|
|
2341
|
+
|
|
2342
|
+
# End current stream and trigger new one via recursive call
|
|
2343
|
+
yield f"data: {json.dumps({'retry_with_fresh_stream': True})}\n\n"
|
|
2344
|
+
|
|
2345
|
+
# Start completely new stream
|
|
2346
|
+
async for chunk in stream_chunks(body):
|
|
2347
|
+
yield chunk
|
|
2348
|
+
return
|
|
2349
|
+
|
|
2350
|
+
# Gracefully close stream with error message
|
|
2351
|
+
if is_timeout_error:
|
|
2352
|
+
error_msg = "⚠️ Request timed out. The response may be incomplete."
|
|
2353
|
+
elif is_token_throttling:
|
|
2354
|
+
error_msg = "⚠️ Rate limit exceeded. Please try again in a moment."
|
|
2355
|
+
else:
|
|
2356
|
+
error_msg = f"⚠️ An error occurred: {str(e)}"
|
|
2357
|
+
|
|
2358
|
+
# Send error to client
|
|
2359
|
+
error_content = f"\n\n{error_msg}\\n"
|
|
2360
|
+
yield f"data: {json.dumps({'content': error_content})}\n\n"
|
|
2361
|
+
|
|
2362
|
+
# Send completion signal
|
|
2363
|
+
yield f"data: {json.dumps({'done': True})}\n\n"
|
|
2364
|
+
|
|
2365
|
+
# Clean up and exit gracefully
|
|
2366
|
+
await cleanup_stream(conversation_id)
|
|
2367
|
+
return
|
|
2368
|
+
|
|
2346
2369
|
if is_token_throttling and token_throttling_retries < max_token_throttling_retries:
|
|
2347
2370
|
token_throttling_retries += 1
|
|
2348
2371
|
logger.info(f"🔄 TOKEN_THROTTLING: Detected token throttling in multi-round session, attempt {token_throttling_retries}/{max_token_throttling_retries}")
|
|
@@ -2354,8 +2377,8 @@ async def stream_chunks(body):
|
|
|
2354
2377
|
"retry_attempt": token_throttling_retries,
|
|
2355
2378
|
"wait_time": 20
|
|
2356
2379
|
}
|
|
2357
|
-
|
|
2358
|
-
yield f"data: {json.dumps({'
|
|
2380
|
+
final_retry_msg = f"\\n🔄 Retrying with fresh connection... (attempt {token_throttling_retries}/{max_token_throttling_retries})\\n"
|
|
2381
|
+
yield f"data: {json.dumps({'content': final_retry_msg})}\n\n"
|
|
2359
2382
|
|
|
2360
2383
|
# Wait 20 seconds and retry with fresh connection
|
|
2361
2384
|
await asyncio.sleep(20)
|
|
@@ -2372,12 +2395,11 @@ async def stream_chunks(body):
|
|
|
2372
2395
|
logger.debug(f"PARTIAL RESPONSE PRESERVED (AGENT ERROR):\n{current_response}")
|
|
2373
2396
|
|
|
2374
2397
|
# Send the partial content to the frontend
|
|
2375
|
-
|
|
2376
|
-
yield f"data: {json.dumps({'ops': ops})}\n\n"
|
|
2398
|
+
yield f"data: {json.dumps({'content': current_response})}\n\n"
|
|
2377
2399
|
|
|
2378
2400
|
# Send warning about partial response
|
|
2379
|
-
|
|
2380
|
-
yield f"data: {json.dumps({'
|
|
2401
|
+
warning_msg = f"Server encountered an error after generating {len(current_response)} characters. The partial response has been preserved."
|
|
2402
|
+
yield f"data: {json.dumps({'warning': warning_msg})}\n\n"
|
|
2381
2403
|
|
|
2382
2404
|
full_response = current_response # Ensure it's preserved in full_response
|
|
2383
2405
|
|
|
@@ -2456,118 +2478,8 @@ async def stream_chunks(body):
|
|
|
2456
2478
|
await cleanup_stream(conversation_id)
|
|
2457
2479
|
|
|
2458
2480
|
# Override the stream endpoint with our error handling
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
"""Stream endpoint with centralized error handling."""
|
|
2462
|
-
logger.info(f"🔍 STREAM_ENDPOINT: Direct /ziya/stream called - this should be using stream_chunks")
|
|
2463
|
-
logger.info(f"🔍 STREAM_ENDPOINT: Request body keys: {body.keys()}")
|
|
2464
|
-
|
|
2465
|
-
# Check for direct streaming mode
|
|
2466
|
-
import os
|
|
2467
|
-
use_direct_streaming = os.getenv('ZIYA_USE_DIRECT_STREAMING', 'true').lower() == 'true'
|
|
2468
|
-
logger.info(f"🚀 DIRECT_STREAMING: stream_endpoint check = {use_direct_streaming}")
|
|
2469
|
-
|
|
2470
|
-
if use_direct_streaming:
|
|
2471
|
-
logger.info("🚀 DIRECT_STREAMING: Using direct streaming in stream_endpoint")
|
|
2472
|
-
return StreamingResponse(
|
|
2473
|
-
stream_chunks(body),
|
|
2474
|
-
media_type="text/event-stream",
|
|
2475
|
-
headers={
|
|
2476
|
-
"Cache-Control": "no-cache",
|
|
2477
|
-
"Connection": "keep-alive",
|
|
2478
|
-
"X-Accel-Buffering": "no",
|
|
2479
|
-
"Access-Control-Allow-Origin": "*",
|
|
2480
|
-
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
2481
|
-
"Access-Control-Allow-Headers": "Content-Type"
|
|
2482
|
-
}
|
|
2483
|
-
)
|
|
2484
|
-
|
|
2485
|
-
try:
|
|
2486
|
-
# Debug logging
|
|
2487
|
-
logger.info("[INSTRUMENTATION] /ziya/stream received request")
|
|
2488
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream question: '{body.get('question', 'EMPTY')[:50]}...' (truncated)")
|
|
2489
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream chat_history length: {len(body.get('chat_history', []))}")
|
|
2490
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream files count: {len(body.get('config', {}).get('files', []))}")
|
|
2491
|
-
|
|
2492
|
-
# Log body structure
|
|
2493
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream body keys: {body.keys() if isinstance(body, dict) else type(body)}")
|
|
2494
|
-
|
|
2495
|
-
# Log chat history structure if present
|
|
2496
|
-
chat_history = body.get('chat_history', [])
|
|
2497
|
-
if chat_history and len(chat_history) > 0:
|
|
2498
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream first history item type: {type(chat_history[0])}")
|
|
2499
|
-
if isinstance(chat_history[0], list) and len(chat_history[0]) >= 2:
|
|
2500
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream first history format: ['{chat_history[0][0][:20]}...', '{chat_history[0][1][:20]}...'] (truncated)")
|
|
2501
|
-
elif isinstance(chat_history[0], dict):
|
|
2502
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream first history keys: {chat_history[0].keys()}")
|
|
2503
|
-
|
|
2504
|
-
# Check if the question is empty or missing
|
|
2505
|
-
if not body.get("question") or not body.get("question").strip():
|
|
2506
|
-
logger.warning("[INSTRUMENTATION] /ziya/stream empty question detected")
|
|
2507
|
-
raise ValidationError("Please provide a question to continue.")
|
|
2508
|
-
|
|
2509
|
-
# Clean chat history if present
|
|
2510
|
-
if "chat_history" in body:
|
|
2511
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream cleaning chat history of length {len(chat_history)}")
|
|
2512
|
-
cleaned_history = []
|
|
2513
|
-
for pair in body["chat_history"]:
|
|
2514
|
-
try:
|
|
2515
|
-
# Handle both tuple format [role, content] and dict format {"type": role, "content": content}
|
|
2516
|
-
if isinstance(pair, dict) and 'type' in pair and 'content' in pair:
|
|
2517
|
-
role, content = pair['type'], pair['content']
|
|
2518
|
-
elif isinstance(pair, (list, tuple)) and len(pair) == 2:
|
|
2519
|
-
role, content = pair[0], pair[1]
|
|
2520
|
-
else:
|
|
2521
|
-
logger.warning(f"[INSTRUMENTATION] /ziya/stream invalid chat history pair format: {type(pair)}")
|
|
2522
|
-
continue
|
|
2523
|
-
|
|
2524
|
-
if not isinstance(role, str) or not isinstance(content, str):
|
|
2525
|
-
logger.warning(f"[INSTRUMENTATION] /ziya/stream non-string message: role={type(role)}, content={type(content)}")
|
|
2526
|
-
continue
|
|
2527
|
-
|
|
2528
|
-
if role.strip() and content.strip():
|
|
2529
|
-
cleaned_history.append((role.strip(), content.strip()))
|
|
2530
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream added valid message: role='{role}', content='{content[:20]}...' (truncated)")
|
|
2531
|
-
else:
|
|
2532
|
-
logger.warning(f"[INSTRUMENTATION] /ziya/stream empty message content")
|
|
2533
|
-
except Exception as e:
|
|
2534
|
-
logger.error(f"[INSTRUMENTATION] /ziya/stream error processing chat history item: {str(e)}")
|
|
2535
|
-
|
|
2536
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream cleaned chat history from {len(body['chat_history'])} to {len(cleaned_history)} pairs")
|
|
2537
|
-
body["chat_history"] = cleaned_history
|
|
2538
|
-
|
|
2539
|
-
logger.info("[INSTRUMENTATION] /ziya/stream starting stream endpoint with body size: %d", len(str(body)))
|
|
2540
|
-
|
|
2541
|
-
# Convert to ChatPromptValue if needed
|
|
2542
|
-
if isinstance(body, dict) and "messages" in body:
|
|
2543
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream converting {len(body['messages'])} messages to ChatPromptValue")
|
|
2544
|
-
from langchain_core.prompt_values import ChatPromptValue
|
|
2545
|
-
from langchain_core.messages import HumanMessage
|
|
2546
|
-
messages = [HumanMessage(content=msg) for msg in body["messages"]]
|
|
2547
|
-
prompt_value = ChatPromptValue(messages=messages)
|
|
2548
|
-
# Keep body as dict but store the prompt value for later use if needed
|
|
2549
|
-
logger.info(f"[INSTRUMENTATION] /ziya/stream created ChatPromptValue with {len(messages)} messages")
|
|
2550
|
-
|
|
2551
|
-
# Return the streaming response
|
|
2552
|
-
logger.info("[INSTRUMENTATION] /ziya/stream calling stream_chunks()")
|
|
2553
|
-
return StreamingResponse(
|
|
2554
|
-
stream_chunks(body),
|
|
2555
|
-
media_type="text/event-stream",
|
|
2556
|
-
headers={
|
|
2557
|
-
"Cache-Control": "no-cache",
|
|
2558
|
-
"Connection": "keep-alive",
|
|
2559
|
-
"X-Accel-Buffering": "no",
|
|
2560
|
-
"Access-Control-Allow-Origin": "*",
|
|
2561
|
-
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
2562
|
-
"Access-Control-Allow-Headers": "Content-Type"
|
|
2563
|
-
}
|
|
2564
|
-
)
|
|
2565
|
-
except Exception as e:
|
|
2566
|
-
# Handle any exceptions using the centralized error handler
|
|
2567
|
-
logger.error(f"Exception in stream_endpoint: {str(e)}")
|
|
2568
|
-
return handle_request_exception(request, e)
|
|
2569
|
-
|
|
2570
|
-
async def stream_agent_response(body, request):
|
|
2481
|
+
# DISABLED: Manual /ziya/stream endpoint conflicts with /api/chat
|
|
2482
|
+
# @app.post("/ziya/stream")
|
|
2571
2483
|
"""Stream the agent's response with centralized error handling."""
|
|
2572
2484
|
try:
|
|
2573
2485
|
first_chunk = True
|
|
@@ -2761,28 +2673,30 @@ async def get_folder(request: FolderRequest):
|
|
|
2761
2673
|
return {"error": str(e)}
|
|
2762
2674
|
|
|
2763
2675
|
# Import scan progress from directory_util
|
|
2764
|
-
from app.utils.directory_util import get_scan_progress, cancel_scan, _scan_progress
|
|
2676
|
+
# from app.utils.directory_util import get_scan_progress, cancel_scan, _scan_progress
|
|
2765
2677
|
|
|
2766
2678
|
@app.get("/folder-progress")
|
|
2767
2679
|
async def get_folder_progress():
|
|
2768
2680
|
"""Get current folder scanning progress."""
|
|
2769
|
-
progress = get_scan_progress()
|
|
2681
|
+
# progress = get_scan_progress()
|
|
2770
2682
|
# Only return active=True if there's actual progress to report
|
|
2771
|
-
if progress["active"] and not progress["progress"]:
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
return progress
|
|
2683
|
+
# if progress["active"] and not progress["progress"]:
|
|
2684
|
+
# # No actual progress data, don't report as active
|
|
2685
|
+
# progress["active"] = False
|
|
2686
|
+
# progress["progress"] = {}
|
|
2687
|
+
# return progress
|
|
2688
|
+
return {"active": False, "progress": {}}
|
|
2776
2689
|
|
|
2777
2690
|
@app.post("/folder-cancel")
|
|
2778
2691
|
async def cancel_folder_scan():
|
|
2779
2692
|
"""Cancel current folder scanning operation."""
|
|
2780
|
-
was_active = cancel_scan()
|
|
2781
|
-
if was_active:
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2693
|
+
# was_active = cancel_scan()
|
|
2694
|
+
# if was_active:
|
|
2695
|
+
# logger.info("Folder scan cancellation requested")
|
|
2696
|
+
logger.info("Folder scan cancellation not available")
|
|
2697
|
+
return {"cancelled": False}
|
|
2698
|
+
|
|
2699
|
+
@app.post("/file")
|
|
2786
2700
|
async def get_file(request: FileRequest):
|
|
2787
2701
|
"""Get the content of a file."""
|
|
2788
2702
|
try:
|
|
@@ -3569,13 +3483,10 @@ async def set_model(request: SetModelRequest):
|
|
|
3569
3483
|
logger.error(f"Failed to create agent: {str(agent_error)}", exc_info=True)
|
|
3570
3484
|
raise agent_error
|
|
3571
3485
|
|
|
3572
|
-
#
|
|
3573
|
-
|
|
3574
|
-
|
|
3575
|
-
|
|
3576
|
-
except Exception as langserve_error:
|
|
3577
|
-
logger.error(f"Failed to initialize langserve: {str(langserve_error)}", exc_info=True)
|
|
3578
|
-
raise langserve_error
|
|
3486
|
+
# COMPLETELY DISABLED: LangServe routes cause duplicate execution with /api/chat
|
|
3487
|
+
# initialize_langserve(app, agent_executor)
|
|
3488
|
+
# _langserve_initialized = True
|
|
3489
|
+
logger.info("LangServe completely disabled to prevent duplicate execution - using /api/chat only")
|
|
3579
3490
|
|
|
3580
3491
|
# Force garbage collection after successful model change
|
|
3581
3492
|
import gc
|