xgae 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xgae might be problematic. Click here for more details.
- xgae/engine/responser/xga_non_stream_responser.py +213 -0
- xgae/engine/responser/xga_responser_base.py +751 -0
- xgae/engine/responser/xga_stream_responser.py +787 -0
- xgae/engine/xga_base.py +23 -9
- xgae/engine/xga_engine.py +228 -55
- xgae/engine/xga_mcp_tool_box.py +8 -3
- xgae/engine/xga_prompt_builder.py +27 -45
- xgae/utils/json_helpers.py +174 -0
- xgae/utils/llm_client.py +17 -6
- xgae/utils/setup_env.py +1 -31
- xgae/utils/utils.py +42 -0
- xgae/utils/xml_tool_parser.py +236 -0
- {xgae-0.1.2.dist-info → xgae-0.1.4.dist-info}/METADATA +1 -1
- xgae-0.1.4.dist-info/RECORD +16 -0
- xgae/engine/responser/xga_responser_utils.py +0 -0
- xgae/engine/responser/xga_stream_reponser.py +0 -0
- xgae-0.1.2.dist-info/RECORD +0 -13
- {xgae-0.1.2.dist-info → xgae-0.1.4.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,787 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import List, Dict, Any, Optional, AsyncGenerator, override
|
|
7
|
+
|
|
8
|
+
from xgae.engine.responser.xga_responser_base import TaskResponseProcessor, ProcessorConfig, TaskResponseContext,TaskRunContinuousState
|
|
9
|
+
from xgae.utils.json_helpers import (
|
|
10
|
+
ensure_dict, safe_json_parse,
|
|
11
|
+
to_json_string, format_for_yield
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
class StreamTaskResponser(TaskResponseProcessor):
|
|
15
|
+
def __init__(self, response_context: TaskResponseContext):
|
|
16
|
+
super().__init__(response_context)
|
|
17
|
+
|
|
18
|
+
@override
|
|
19
|
+
async def process_response(
|
|
20
|
+
self,
|
|
21
|
+
llm_response: AsyncGenerator,
|
|
22
|
+
prompt_messages: List[Dict[str, Any]],
|
|
23
|
+
continuous_state: Optional[TaskRunContinuousState] = None,
|
|
24
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
25
|
+
"""Process a streaming LLM response, handling tool calls and execution.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
llm_response: Streaming response from the LLM
|
|
29
|
+
thread_id: ID of the conversation thread
|
|
30
|
+
prompt_messages: List of messages sent to the LLM (the prompt)
|
|
31
|
+
llm_model: The name of the LLM model used
|
|
32
|
+
config: Configuration for parsing and execution
|
|
33
|
+
can_auto_continue: Whether auto-continue is enabled
|
|
34
|
+
auto_continue_count: Number of auto-continue cycles
|
|
35
|
+
continuous_state: Previous state of the conversation
|
|
36
|
+
|
|
37
|
+
Yields:
|
|
38
|
+
Complete message objects matching the DB schema, except for content chunks.
|
|
39
|
+
"""
|
|
40
|
+
# Initialize from continuous state if provided (for auto-continue)
|
|
41
|
+
can_auto_continue = continuous_state.get("auto_continue", False)
|
|
42
|
+
auto_continue_count = continuous_state.get("auto_continue_count", 0)
|
|
43
|
+
llm_model = self.response_context.get("model_name")
|
|
44
|
+
config: ProcessorConfig = ProcessorConfig()
|
|
45
|
+
thread_id = self.response_context.get("task_id")
|
|
46
|
+
|
|
47
|
+
continuous_state = continuous_state or {}
|
|
48
|
+
accumulated_content = continuous_state.get('accumulated_content', "")
|
|
49
|
+
tool_calls_buffer = {}
|
|
50
|
+
current_xml_content = accumulated_content # equal to accumulated_content if auto-continuing, else blank
|
|
51
|
+
xml_chunks_buffer = []
|
|
52
|
+
pending_tool_executions = []
|
|
53
|
+
yielded_tool_indices = set() # Stores indices of tools whose *status* has been yielded
|
|
54
|
+
tool_index = 0
|
|
55
|
+
xml_tool_call_count = 0
|
|
56
|
+
finish_reason = None
|
|
57
|
+
should_auto_continue = False
|
|
58
|
+
last_assistant_message_object = None # Store the final saved assistant message object
|
|
59
|
+
tool_result_message_objects = {} # tool_index -> full saved message object
|
|
60
|
+
has_printed_thinking_prefix = False # Flag for printing thinking prefix only once
|
|
61
|
+
agent_should_terminate = False # Flag to track if a terminating tool has been executed
|
|
62
|
+
complete_native_tool_calls = [] # Initialize early for use in assistant_response_end
|
|
63
|
+
|
|
64
|
+
# Collect metadata for reconstructing LiteLLM response object
|
|
65
|
+
streaming_metadata = {
|
|
66
|
+
"model": llm_model,
|
|
67
|
+
"created": None,
|
|
68
|
+
"usage": {
|
|
69
|
+
"prompt_tokens": 0,
|
|
70
|
+
"completion_tokens": 0,
|
|
71
|
+
"total_tokens": 0
|
|
72
|
+
},
|
|
73
|
+
"response_ms": None,
|
|
74
|
+
"first_chunk_time": None,
|
|
75
|
+
"last_chunk_time": None
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
logging.info(f"Streaming Config: XML={config.xml_tool_calling}, Native={config.native_tool_calling}, "
|
|
79
|
+
f"Execute on stream={config.execute_on_stream}, Strategy={config.tool_execution_strategy}")
|
|
80
|
+
|
|
81
|
+
# Reuse thread_run_id for auto-continue or create new one
|
|
82
|
+
thread_run_id = continuous_state.get('thread_run_id') or str(uuid.uuid4())
|
|
83
|
+
continuous_state['thread_run_id'] = thread_run_id
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
# --- Save and Yield Start Events (only if not auto-continuing) ---
|
|
87
|
+
if auto_continue_count == 0:
|
|
88
|
+
start_content = {"status_type": "thread_run_start", "thread_run_id": thread_run_id}
|
|
89
|
+
start_msg_obj = await self.add_message(
|
|
90
|
+
type="status", content=start_content,
|
|
91
|
+
is_llm_message=False, metadata={"thread_run_id": thread_run_id}
|
|
92
|
+
)
|
|
93
|
+
if start_msg_obj: yield format_for_yield(start_msg_obj)
|
|
94
|
+
|
|
95
|
+
assist_start_content = {"status_type": "assistant_response_start"}
|
|
96
|
+
assist_start_msg_obj = await self.add_message(
|
|
97
|
+
type="status", content=assist_start_content,
|
|
98
|
+
is_llm_message=False, metadata={"thread_run_id": thread_run_id}
|
|
99
|
+
)
|
|
100
|
+
if assist_start_msg_obj: yield format_for_yield(assist_start_msg_obj)
|
|
101
|
+
# --- End Start Events ---
|
|
102
|
+
|
|
103
|
+
__sequence = continuous_state.get('sequence', 0) # get the sequence from the previous auto-continue cycle
|
|
104
|
+
|
|
105
|
+
async for chunk in llm_response:
|
|
106
|
+
# Extract streaming metadata from chunks
|
|
107
|
+
current_time = datetime.now(timezone.utc).timestamp()
|
|
108
|
+
if streaming_metadata["first_chunk_time"] is None:
|
|
109
|
+
streaming_metadata["first_chunk_time"] = current_time
|
|
110
|
+
streaming_metadata["last_chunk_time"] = current_time
|
|
111
|
+
|
|
112
|
+
# Extract metadata from chunk attributes
|
|
113
|
+
if hasattr(chunk, 'created') and chunk.created:
|
|
114
|
+
streaming_metadata["created"] = chunk.created
|
|
115
|
+
if hasattr(chunk, 'model') and chunk.model:
|
|
116
|
+
streaming_metadata["model"] = chunk.model
|
|
117
|
+
if hasattr(chunk, 'usage') and chunk.usage:
|
|
118
|
+
# Update usage information if available (including zero values)
|
|
119
|
+
if hasattr(chunk.usage, 'prompt_tokens') and chunk.usage.prompt_tokens is not None:
|
|
120
|
+
streaming_metadata["usage"]["prompt_tokens"] = chunk.usage.prompt_tokens
|
|
121
|
+
if hasattr(chunk.usage, 'completion_tokens') and chunk.usage.completion_tokens is not None:
|
|
122
|
+
streaming_metadata["usage"]["completion_tokens"] = chunk.usage.completion_tokens
|
|
123
|
+
if hasattr(chunk.usage, 'total_tokens') and chunk.usage.total_tokens is not None:
|
|
124
|
+
streaming_metadata["usage"]["total_tokens"] = chunk.usage.total_tokens
|
|
125
|
+
|
|
126
|
+
if hasattr(chunk, 'choices') and chunk.choices and hasattr(chunk.choices[0], 'finish_reason') and \
|
|
127
|
+
chunk.choices[0].finish_reason:
|
|
128
|
+
finish_reason = chunk.choices[0].finish_reason
|
|
129
|
+
logging.debug(f"Detected finish_reason: {finish_reason}")
|
|
130
|
+
|
|
131
|
+
if hasattr(chunk, 'choices') and chunk.choices:
|
|
132
|
+
delta = chunk.choices[0].delta if hasattr(chunk.choices[0], 'delta') else None
|
|
133
|
+
|
|
134
|
+
# Check for and log Anthropic thinking content
|
|
135
|
+
if delta and hasattr(delta, 'reasoning_content') and delta.reasoning_content:
|
|
136
|
+
if not has_printed_thinking_prefix:
|
|
137
|
+
# print("[THINKING]: ", end='', flush=True)
|
|
138
|
+
has_printed_thinking_prefix = True
|
|
139
|
+
# print(delta.reasoning_content, end='', flush=True)
|
|
140
|
+
# Append reasoning to main content to be saved in the final message
|
|
141
|
+
accumulated_content += delta.reasoning_content
|
|
142
|
+
|
|
143
|
+
# Process content chunk
|
|
144
|
+
if delta and hasattr(delta, 'content') and delta.content:
|
|
145
|
+
chunk_content = delta.content
|
|
146
|
+
# print(chunk_content, end='', flush=True)
|
|
147
|
+
accumulated_content += chunk_content
|
|
148
|
+
current_xml_content += chunk_content
|
|
149
|
+
|
|
150
|
+
if not (config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls):
|
|
151
|
+
# Yield ONLY content chunk (don't save)
|
|
152
|
+
now_chunk = datetime.now(timezone.utc).isoformat()
|
|
153
|
+
yield {
|
|
154
|
+
"sequence": __sequence,
|
|
155
|
+
"message_id": None, "thread_id": thread_id, "type": "assistant",
|
|
156
|
+
"is_llm_message": True,
|
|
157
|
+
"content": to_json_string({"role": "assistant", "content": chunk_content}),
|
|
158
|
+
"metadata": to_json_string({"stream_status": "chunk", "thread_run_id": thread_run_id}),
|
|
159
|
+
"created_at": now_chunk, "updated_at": now_chunk
|
|
160
|
+
}
|
|
161
|
+
__sequence += 1
|
|
162
|
+
else:
|
|
163
|
+
logging.info("XML tool call limit reached - not yielding more content chunks")
|
|
164
|
+
self.trace.event(name="xml_tool_call_limit_reached", level="DEFAULT", status_message=(
|
|
165
|
+
f"XML tool call limit reached - not yielding more content chunks"))
|
|
166
|
+
|
|
167
|
+
# --- Process XML Tool Calls (if enabled and limit not reached) ---
|
|
168
|
+
if config.xml_tool_calling and not (
|
|
169
|
+
config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls):
|
|
170
|
+
xml_chunks = self._extract_xml_chunks(current_xml_content)
|
|
171
|
+
for xml_chunk in xml_chunks:
|
|
172
|
+
current_xml_content = current_xml_content.replace(xml_chunk, "", 1)
|
|
173
|
+
xml_chunks_buffer.append(xml_chunk)
|
|
174
|
+
result = self._parse_xml_tool_call(xml_chunk)
|
|
175
|
+
if result:
|
|
176
|
+
tool_call, parsing_details = result
|
|
177
|
+
xml_tool_call_count += 1
|
|
178
|
+
current_assistant_id = last_assistant_message_object[
|
|
179
|
+
'message_id'] if last_assistant_message_object else None
|
|
180
|
+
context = self._create_tool_context(
|
|
181
|
+
tool_call, tool_index, current_assistant_id, parsing_details
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if config.execute_tools and config.execute_on_stream:
|
|
185
|
+
# Save and Yield tool_started status
|
|
186
|
+
started_msg_obj = await self._yield_and_save_tool_started(context, thread_id,
|
|
187
|
+
thread_run_id)
|
|
188
|
+
if started_msg_obj: yield format_for_yield(started_msg_obj)
|
|
189
|
+
yielded_tool_indices.add(tool_index) # Mark status as yielded
|
|
190
|
+
|
|
191
|
+
execution_task = asyncio.create_task(self._execute_tool(tool_call))
|
|
192
|
+
pending_tool_executions.append({
|
|
193
|
+
"task": execution_task, "tool_call": tool_call,
|
|
194
|
+
"tool_index": tool_index, "context": context
|
|
195
|
+
})
|
|
196
|
+
tool_index += 1
|
|
197
|
+
|
|
198
|
+
if config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls:
|
|
199
|
+
logging.debug(f"Reached XML tool call limit ({config.max_xml_tool_calls})")
|
|
200
|
+
finish_reason = "xml_tool_limit_reached"
|
|
201
|
+
break # Stop processing more XML chunks in this delta
|
|
202
|
+
|
|
203
|
+
# --- Process Native Tool Call Chunks ---
|
|
204
|
+
if config.native_tool_calling and delta and hasattr(delta, 'tool_calls') and delta.tool_calls:
|
|
205
|
+
for tool_call_chunk in delta.tool_calls:
|
|
206
|
+
# Yield Native Tool Call Chunk (transient status, not saved)
|
|
207
|
+
# ... (safe extraction logic for tool_call_data_chunk) ...
|
|
208
|
+
tool_call_data_chunk = {} # Placeholder for extracted data
|
|
209
|
+
if hasattr(tool_call_chunk, 'model_dump'):
|
|
210
|
+
tool_call_data_chunk = tool_call_chunk.model_dump()
|
|
211
|
+
else: # Manual extraction...
|
|
212
|
+
if hasattr(tool_call_chunk, 'id'): tool_call_data_chunk['id'] = tool_call_chunk.id
|
|
213
|
+
if hasattr(tool_call_chunk, 'index'): tool_call_data_chunk[
|
|
214
|
+
'index'] = tool_call_chunk.index
|
|
215
|
+
if hasattr(tool_call_chunk, 'type'): tool_call_data_chunk['type'] = tool_call_chunk.type
|
|
216
|
+
if hasattr(tool_call_chunk, 'function'):
|
|
217
|
+
tool_call_data_chunk['function'] = {}
|
|
218
|
+
if hasattr(tool_call_chunk.function, 'name'): tool_call_data_chunk['function'][
|
|
219
|
+
'name'] = tool_call_chunk.function.name
|
|
220
|
+
if hasattr(tool_call_chunk.function, 'arguments'): tool_call_data_chunk['function'][
|
|
221
|
+
'arguments'] = tool_call_chunk.function.arguments if isinstance(
|
|
222
|
+
tool_call_chunk.function.arguments, str) else to_json_string(
|
|
223
|
+
tool_call_chunk.function.arguments)
|
|
224
|
+
|
|
225
|
+
now_tool_chunk = datetime.now(timezone.utc).isoformat()
|
|
226
|
+
yield {
|
|
227
|
+
"message_id": None, "thread_id": thread_id, "type": "status", "is_llm_message": True,
|
|
228
|
+
"content": to_json_string({"role": "assistant", "status_type": "tool_call_chunk",
|
|
229
|
+
"tool_call_chunk": tool_call_data_chunk}),
|
|
230
|
+
"metadata": to_json_string({"thread_run_id": thread_run_id}),
|
|
231
|
+
"created_at": now_tool_chunk, "updated_at": now_tool_chunk
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# --- Buffer and Execute Complete Native Tool Calls ---
|
|
235
|
+
if not hasattr(tool_call_chunk, 'function'): continue
|
|
236
|
+
idx = tool_call_chunk.index if hasattr(tool_call_chunk, 'index') else 0
|
|
237
|
+
# ... (buffer update logic remains same) ...
|
|
238
|
+
# ... (check complete logic remains same) ...
|
|
239
|
+
has_complete_tool_call = False # Placeholder
|
|
240
|
+
if (tool_calls_buffer.get(idx) and
|
|
241
|
+
tool_calls_buffer[idx]['id'] and
|
|
242
|
+
tool_calls_buffer[idx]['function']['name'] and
|
|
243
|
+
tool_calls_buffer[idx]['function']['arguments']):
|
|
244
|
+
try:
|
|
245
|
+
safe_json_parse(tool_calls_buffer[idx]['function']['arguments'])
|
|
246
|
+
has_complete_tool_call = True
|
|
247
|
+
except json.JSONDecodeError:
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
if has_complete_tool_call and config.execute_tools and config.execute_on_stream:
|
|
251
|
+
current_tool = tool_calls_buffer[idx]
|
|
252
|
+
tool_call_data = {
|
|
253
|
+
"function_name": current_tool['function']['name'],
|
|
254
|
+
"arguments": safe_json_parse(current_tool['function']['arguments']),
|
|
255
|
+
"id": current_tool['id']
|
|
256
|
+
}
|
|
257
|
+
current_assistant_id = last_assistant_message_object[
|
|
258
|
+
'message_id'] if last_assistant_message_object else None
|
|
259
|
+
context = self._create_tool_context(
|
|
260
|
+
tool_call_data, tool_index, current_assistant_id
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Save and Yield tool_started status
|
|
264
|
+
started_msg_obj = await self._yield_and_save_tool_started(context, thread_id,
|
|
265
|
+
thread_run_id)
|
|
266
|
+
if started_msg_obj: yield format_for_yield(started_msg_obj)
|
|
267
|
+
yielded_tool_indices.add(tool_index) # Mark status as yielded
|
|
268
|
+
|
|
269
|
+
execution_task = asyncio.create_task(self._execute_tool(tool_call_data))
|
|
270
|
+
pending_tool_executions.append({
|
|
271
|
+
"task": execution_task, "tool_call": tool_call_data,
|
|
272
|
+
"tool_index": tool_index, "context": context
|
|
273
|
+
})
|
|
274
|
+
tool_index += 1
|
|
275
|
+
|
|
276
|
+
if finish_reason == "xml_tool_limit_reached":
|
|
277
|
+
logging.info("Stopping stream processing after loop due to XML tool call limit")
|
|
278
|
+
self.trace.event(name="stopping_stream_processing_after_loop_due_to_xml_tool_call_limit",
|
|
279
|
+
level="DEFAULT", status_message=(
|
|
280
|
+
f"Stopping stream processing after loop due to XML tool call limit"))
|
|
281
|
+
break
|
|
282
|
+
|
|
283
|
+
# print() # Add a final newline after the streaming loop finishes
|
|
284
|
+
|
|
285
|
+
# --- After Streaming Loop ---
|
|
286
|
+
|
|
287
|
+
if (
|
|
288
|
+
streaming_metadata["usage"]["total_tokens"] == 0
|
|
289
|
+
):
|
|
290
|
+
logging.info("🔥 No usage data from provider, counting with litellm.token_counter")
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
# prompt side
|
|
294
|
+
# prompt_tokens = token_counter(
|
|
295
|
+
# model=llm_model,
|
|
296
|
+
# messages=prompt_messages # chat or plain; token_counter handles both
|
|
297
|
+
# )
|
|
298
|
+
#
|
|
299
|
+
# # completion side
|
|
300
|
+
# completion_tokens = token_counter(
|
|
301
|
+
# model=llm_model,
|
|
302
|
+
# text=accumulated_content or "" # empty string safe
|
|
303
|
+
# )
|
|
304
|
+
|
|
305
|
+
# streaming_metadata["usage"]["prompt_tokens"] = prompt_tokens
|
|
306
|
+
# streaming_metadata["usage"]["completion_tokens"] = completion_tokens
|
|
307
|
+
# streaming_metadata["usage"]["total_tokens"] = prompt_tokens + completion_tokens
|
|
308
|
+
#
|
|
309
|
+
# logging.info(
|
|
310
|
+
# f"🔥 Estimated tokens – prompt: {prompt_tokens}, "
|
|
311
|
+
# f"completion: {completion_tokens}, total: {prompt_tokens + completion_tokens}"
|
|
312
|
+
# )
|
|
313
|
+
self.trace.event(name="usage_calculated_with_litellm_token_counter", level="DEFAULT",
|
|
314
|
+
status_message=(f"Usage calculated with litellm.token_counter"))
|
|
315
|
+
except Exception as e:
|
|
316
|
+
logging.warning(f"Failed to calculate usage: {str(e)}")
|
|
317
|
+
self.trace.event(name="failed_to_calculate_usage", level="WARNING",
|
|
318
|
+
status_message=(f"Failed to calculate usage: {str(e)}"))
|
|
319
|
+
|
|
320
|
+
# Wait for pending tool executions from streaming phase
|
|
321
|
+
tool_results_buffer = [] # Stores (tool_call, result, tool_index, context)
|
|
322
|
+
if pending_tool_executions:
|
|
323
|
+
logging.info(f"Waiting for {len(pending_tool_executions)} pending streamed tool executions")
|
|
324
|
+
self.trace.event(name="waiting_for_pending_streamed_tool_executions", level="DEFAULT", status_message=(
|
|
325
|
+
f"Waiting for {len(pending_tool_executions)} pending streamed tool executions"))
|
|
326
|
+
# ... (asyncio.wait logic) ...
|
|
327
|
+
pending_tasks = [execution["task"] for execution in pending_tool_executions]
|
|
328
|
+
done, _ = await asyncio.wait(pending_tasks)
|
|
329
|
+
|
|
330
|
+
for execution in pending_tool_executions:
|
|
331
|
+
tool_idx = execution.get("tool_index", -1)
|
|
332
|
+
context = execution["context"]
|
|
333
|
+
tool_name = context.function_name
|
|
334
|
+
|
|
335
|
+
# Check if status was already yielded during stream run
|
|
336
|
+
if tool_idx in yielded_tool_indices:
|
|
337
|
+
logging.debug(f"Status for tool index {tool_idx} already yielded.")
|
|
338
|
+
# Still need to process the result for the buffer
|
|
339
|
+
try:
|
|
340
|
+
if execution["task"].done():
|
|
341
|
+
result = execution["task"].result()
|
|
342
|
+
context.result = result
|
|
343
|
+
tool_results_buffer.append((execution["tool_call"], result, tool_idx, context))
|
|
344
|
+
|
|
345
|
+
if tool_name in ['ask', 'complete']:
|
|
346
|
+
logging.info(
|
|
347
|
+
f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag.")
|
|
348
|
+
self.trace.event(name="terminating_tool_completed_during_streaming",
|
|
349
|
+
level="DEFAULT", status_message=(
|
|
350
|
+
f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag."))
|
|
351
|
+
agent_should_terminate = True
|
|
352
|
+
|
|
353
|
+
else: # Should not happen with asyncio.wait
|
|
354
|
+
logging.warning(f"Task for tool index {tool_idx} not done after wait.")
|
|
355
|
+
self.trace.event(name="task_for_tool_index_not_done_after_wait", level="WARNING",
|
|
356
|
+
status_message=(
|
|
357
|
+
f"Task for tool index {tool_idx} not done after wait."))
|
|
358
|
+
except Exception as e:
|
|
359
|
+
logging.error(f"Error getting result for pending tool execution {tool_idx}: {str(e)}")
|
|
360
|
+
self.trace.event(name="error_getting_result_for_pending_tool_execution", level="ERROR",
|
|
361
|
+
status_message=(
|
|
362
|
+
f"Error getting result for pending tool execution {tool_idx}: {str(e)}"))
|
|
363
|
+
context.error = e
|
|
364
|
+
# Save and Yield tool error status message (even if started was yielded)
|
|
365
|
+
error_msg_obj = await self._yield_and_save_tool_error(context, thread_id, thread_run_id)
|
|
366
|
+
if error_msg_obj: yield format_for_yield(error_msg_obj)
|
|
367
|
+
continue # Skip further status yielding for this tool index
|
|
368
|
+
|
|
369
|
+
# If status wasn't yielded before (shouldn't happen with current logic), yield it now
|
|
370
|
+
try:
|
|
371
|
+
if execution["task"].done():
|
|
372
|
+
result = execution["task"].result()
|
|
373
|
+
context.result = result
|
|
374
|
+
tool_results_buffer.append((execution["tool_call"], result, tool_idx, context))
|
|
375
|
+
|
|
376
|
+
# Check if this is a terminating tool
|
|
377
|
+
if tool_name in ['ask', 'complete']:
|
|
378
|
+
logging.info(
|
|
379
|
+
f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag.")
|
|
380
|
+
self.trace.event(name="terminating_tool_completed_during_streaming", level="DEFAULT",
|
|
381
|
+
status_message=(
|
|
382
|
+
f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag."))
|
|
383
|
+
agent_should_terminate = True
|
|
384
|
+
|
|
385
|
+
# Save and Yield tool completed/failed status
|
|
386
|
+
completed_msg_obj = await self._yield_and_save_tool_completed(
|
|
387
|
+
context, None, thread_id, thread_run_id
|
|
388
|
+
)
|
|
389
|
+
if completed_msg_obj: yield format_for_yield(completed_msg_obj)
|
|
390
|
+
yielded_tool_indices.add(tool_idx)
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logging.error(
|
|
393
|
+
f"Error getting result/yielding status for pending tool execution {tool_idx}: {str(e)}")
|
|
394
|
+
self.trace.event(name="error_getting_result_yielding_status_for_pending_tool_execution",
|
|
395
|
+
level="ERROR", status_message=(
|
|
396
|
+
f"Error getting result/yielding status for pending tool execution {tool_idx}: {str(e)}"))
|
|
397
|
+
context.error = e
|
|
398
|
+
# Save and Yield tool error status
|
|
399
|
+
error_msg_obj = await self._yield_and_save_tool_error(context, thread_id, thread_run_id)
|
|
400
|
+
if error_msg_obj: yield format_for_yield(error_msg_obj)
|
|
401
|
+
yielded_tool_indices.add(tool_idx)
|
|
402
|
+
|
|
403
|
+
# Save and yield finish status if limit was reached
|
|
404
|
+
if finish_reason == "xml_tool_limit_reached":
|
|
405
|
+
finish_content = {"status_type": "finish", "finish_reason": "xml_tool_limit_reached"}
|
|
406
|
+
finish_msg_obj = await self.add_message(
|
|
407
|
+
type="status", content=finish_content,
|
|
408
|
+
is_llm_message=False, metadata={"thread_run_id": thread_run_id}
|
|
409
|
+
)
|
|
410
|
+
if finish_msg_obj: yield format_for_yield(finish_msg_obj)
|
|
411
|
+
logging.info(
|
|
412
|
+
f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls")
|
|
413
|
+
self.trace.event(name="stream_finished_with_reason_xml_tool_limit_reached_after_xml_tool_calls",
|
|
414
|
+
level="DEFAULT", status_message=(
|
|
415
|
+
f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls"))
|
|
416
|
+
|
|
417
|
+
# Calculate if auto-continue is needed if the finish reason is length
|
|
418
|
+
should_auto_continue = (can_auto_continue and finish_reason == 'length')
|
|
419
|
+
|
|
420
|
+
# --- SAVE and YIELD Final Assistant Message ---
|
|
421
|
+
# Only save assistant message if NOT auto-continuing due to length to avoid duplicate messages
|
|
422
|
+
if accumulated_content and not should_auto_continue:
|
|
423
|
+
# ... (Truncate accumulated_content logic) ...
|
|
424
|
+
if config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls and xml_chunks_buffer:
|
|
425
|
+
last_xml_chunk = xml_chunks_buffer[-1]
|
|
426
|
+
last_chunk_end_pos = accumulated_content.find(last_xml_chunk) + len(last_xml_chunk)
|
|
427
|
+
if last_chunk_end_pos > 0:
|
|
428
|
+
accumulated_content = accumulated_content[:last_chunk_end_pos]
|
|
429
|
+
|
|
430
|
+
# ... (Extract complete_native_tool_calls logic) ...
|
|
431
|
+
# Update complete_native_tool_calls from buffer (initialized earlier)
|
|
432
|
+
if config.native_tool_calling:
|
|
433
|
+
for idx, tc_buf in tool_calls_buffer.items():
|
|
434
|
+
if tc_buf['id'] and tc_buf['function']['name'] and tc_buf['function']['arguments']:
|
|
435
|
+
try:
|
|
436
|
+
args = safe_json_parse(tc_buf['function']['arguments'])
|
|
437
|
+
complete_native_tool_calls.append({
|
|
438
|
+
"id": tc_buf['id'], "type": "function",
|
|
439
|
+
"function": {"name": tc_buf['function']['name'], "arguments": args}
|
|
440
|
+
})
|
|
441
|
+
except json.JSONDecodeError:
|
|
442
|
+
continue
|
|
443
|
+
|
|
444
|
+
message_data = { # Dict to be saved in 'content'
|
|
445
|
+
"role": "assistant", "content": accumulated_content,
|
|
446
|
+
"tool_calls": complete_native_tool_calls or None
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
last_assistant_message_object = await self._add_message_with_agent_info(type="assistant", content=message_data,
|
|
450
|
+
is_llm_message=True, metadata={"thread_run_id": thread_run_id}
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if last_assistant_message_object:
|
|
454
|
+
# Yield the complete saved object, adding stream_status metadata just for yield
|
|
455
|
+
yield_metadata = ensure_dict(last_assistant_message_object.get('metadata'), {})
|
|
456
|
+
yield_metadata['stream_status'] = 'complete'
|
|
457
|
+
# Format the message for yielding
|
|
458
|
+
yield_message = last_assistant_message_object.copy()
|
|
459
|
+
yield_message['metadata'] = yield_metadata
|
|
460
|
+
yield format_for_yield(yield_message)
|
|
461
|
+
else:
|
|
462
|
+
logging.error(f"Failed to save final assistant message for thread {thread_id}")
|
|
463
|
+
self.trace.event(name="failed_to_save_final_assistant_message_for_thread", level="ERROR",
|
|
464
|
+
status_message=(f"Failed to save final assistant message for thread {thread_id}"))
|
|
465
|
+
# Save and yield an error status
|
|
466
|
+
err_content = {"role": "system", "status_type": "error",
|
|
467
|
+
"message": "Failed to save final assistant message"}
|
|
468
|
+
err_msg_obj = await self.add_message(
|
|
469
|
+
type="status", content=err_content,
|
|
470
|
+
is_llm_message=False, metadata={"thread_run_id": thread_run_id}
|
|
471
|
+
)
|
|
472
|
+
if err_msg_obj: yield format_for_yield(err_msg_obj)
|
|
473
|
+
|
|
474
|
+
# --- Process All Tool Results Now ---
|
|
475
|
+
if config.execute_tools:
|
|
476
|
+
final_tool_calls_to_process = []
|
|
477
|
+
# ... (Gather final_tool_calls_to_process from native and XML buffers) ...
|
|
478
|
+
# Gather native tool calls from buffer
|
|
479
|
+
if config.native_tool_calling and complete_native_tool_calls:
|
|
480
|
+
for tc in complete_native_tool_calls:
|
|
481
|
+
final_tool_calls_to_process.append({
|
|
482
|
+
"function_name": tc["function"]["name"],
|
|
483
|
+
"arguments": tc["function"]["arguments"], # Already parsed object
|
|
484
|
+
"id": tc["id"]
|
|
485
|
+
})
|
|
486
|
+
# Gather XML tool calls from buffer (up to limit)
|
|
487
|
+
parsed_xml_data = []
|
|
488
|
+
if config.xml_tool_calling:
|
|
489
|
+
# Reparse remaining content just in case (should be empty if processed correctly)
|
|
490
|
+
xml_chunks = self._extract_xml_chunks(current_xml_content)
|
|
491
|
+
xml_chunks_buffer.extend(xml_chunks)
|
|
492
|
+
# Process only chunks not already handled in the stream loop
|
|
493
|
+
remaining_limit = config.max_xml_tool_calls - xml_tool_call_count if config.max_xml_tool_calls > 0 else len(
|
|
494
|
+
xml_chunks_buffer)
|
|
495
|
+
xml_chunks_to_process = xml_chunks_buffer[:remaining_limit] # Ensure limit is respected
|
|
496
|
+
|
|
497
|
+
for chunk in xml_chunks_to_process:
|
|
498
|
+
parsed_result = self._parse_xml_tool_call(chunk)
|
|
499
|
+
if parsed_result:
|
|
500
|
+
tool_call, parsing_details = parsed_result
|
|
501
|
+
# Avoid adding if already processed during streaming
|
|
502
|
+
if not any(exec['tool_call'] == tool_call for exec in pending_tool_executions):
|
|
503
|
+
final_tool_calls_to_process.append(tool_call)
|
|
504
|
+
parsed_xml_data.append({'tool_call': tool_call, 'parsing_details': parsing_details})
|
|
505
|
+
|
|
506
|
+
all_tool_data_map = {} # tool_index -> {'tool_call': ..., 'parsing_details': ...}
|
|
507
|
+
# Add native tool data
|
|
508
|
+
native_tool_index = 0
|
|
509
|
+
if config.native_tool_calling and complete_native_tool_calls:
|
|
510
|
+
for tc in complete_native_tool_calls:
|
|
511
|
+
# Find the corresponding entry in final_tool_calls_to_process if needed
|
|
512
|
+
# For now, assume order matches if only native used
|
|
513
|
+
exec_tool_call = {
|
|
514
|
+
"function_name": tc["function"]["name"],
|
|
515
|
+
"arguments": tc["function"]["arguments"],
|
|
516
|
+
"id": tc["id"]
|
|
517
|
+
}
|
|
518
|
+
all_tool_data_map[native_tool_index] = {"tool_call": exec_tool_call, "parsing_details": None}
|
|
519
|
+
native_tool_index += 1
|
|
520
|
+
|
|
521
|
+
# Add XML tool data
|
|
522
|
+
xml_tool_index_start = native_tool_index
|
|
523
|
+
for idx, item in enumerate(parsed_xml_data):
|
|
524
|
+
all_tool_data_map[xml_tool_index_start + idx] = item
|
|
525
|
+
|
|
526
|
+
tool_results_map = {} # tool_index -> (tool_call, result, context)
|
|
527
|
+
|
|
528
|
+
# Populate from buffer if executed on stream
|
|
529
|
+
if config.execute_on_stream and tool_results_buffer:
|
|
530
|
+
logging.info(f"Processing {len(tool_results_buffer)} buffered tool results")
|
|
531
|
+
self.trace.event(name="processing_buffered_tool_results", level="DEFAULT",
|
|
532
|
+
status_message=(f"Processing {len(tool_results_buffer)} buffered tool results"))
|
|
533
|
+
for tool_call, result, tool_idx, context in tool_results_buffer:
|
|
534
|
+
if last_assistant_message_object: context.assistant_message_id = last_assistant_message_object[
|
|
535
|
+
'message_id']
|
|
536
|
+
tool_results_map[tool_idx] = (tool_call, result, context)
|
|
537
|
+
|
|
538
|
+
# Or execute now if not streamed
|
|
539
|
+
elif final_tool_calls_to_process and not config.execute_on_stream:
|
|
540
|
+
logging.info(
|
|
541
|
+
f"Executing {len(final_tool_calls_to_process)} tools ({config.tool_execution_strategy}) after stream")
|
|
542
|
+
self.trace.event(name="executing_tools_after_stream", level="DEFAULT", status_message=(
|
|
543
|
+
f"Executing {len(final_tool_calls_to_process)} tools ({config.tool_execution_strategy}) after stream"))
|
|
544
|
+
results_list = await self._execute_tools(final_tool_calls_to_process,
|
|
545
|
+
config.tool_execution_strategy)
|
|
546
|
+
current_tool_idx = 0
|
|
547
|
+
for tc, res in results_list:
|
|
548
|
+
# Map back using all_tool_data_map which has correct indices
|
|
549
|
+
if current_tool_idx in all_tool_data_map:
|
|
550
|
+
tool_data = all_tool_data_map[current_tool_idx]
|
|
551
|
+
context = self._create_tool_context(
|
|
552
|
+
tc, current_tool_idx,
|
|
553
|
+
last_assistant_message_object['message_id'] if last_assistant_message_object else None,
|
|
554
|
+
tool_data.get('parsing_details')
|
|
555
|
+
)
|
|
556
|
+
context.result = res
|
|
557
|
+
tool_results_map[current_tool_idx] = (tc, res, context)
|
|
558
|
+
else:
|
|
559
|
+
logging.warning(f"Could not map result for tool index {current_tool_idx}")
|
|
560
|
+
self.trace.event(name="could_not_map_result_for_tool_index", level="WARNING",
|
|
561
|
+
status_message=(f"Could not map result for tool index {current_tool_idx}"))
|
|
562
|
+
current_tool_idx += 1
|
|
563
|
+
|
|
564
|
+
# Save and Yield each result message
|
|
565
|
+
if tool_results_map:
|
|
566
|
+
logging.info(f"Saving and yielding {len(tool_results_map)} final tool result messages")
|
|
567
|
+
self.trace.event(name="saving_and_yielding_final_tool_result_messages", level="DEFAULT",
|
|
568
|
+
status_message=(
|
|
569
|
+
f"Saving and yielding {len(tool_results_map)} final tool result messages"))
|
|
570
|
+
for tool_idx in sorted(tool_results_map.keys()):
|
|
571
|
+
tool_call, result, context = tool_results_map[tool_idx]
|
|
572
|
+
context.result = result
|
|
573
|
+
if not context.assistant_message_id and last_assistant_message_object:
|
|
574
|
+
context.assistant_message_id = last_assistant_message_object['message_id']
|
|
575
|
+
|
|
576
|
+
# Yield start status ONLY IF executing non-streamed (already yielded if streamed)
|
|
577
|
+
if not config.execute_on_stream and tool_idx not in yielded_tool_indices:
|
|
578
|
+
started_msg_obj = await self._yield_and_save_tool_started(context, thread_id, thread_run_id)
|
|
579
|
+
if started_msg_obj: yield format_for_yield(started_msg_obj)
|
|
580
|
+
yielded_tool_indices.add(tool_idx) # Mark status yielded
|
|
581
|
+
|
|
582
|
+
# Save the tool result message to DB
|
|
583
|
+
saved_tool_result_object = await self._add_tool_result( # Returns full object or None
|
|
584
|
+
thread_id, tool_call, result, config.xml_adding_strategy,
|
|
585
|
+
context.assistant_message_id, context.parsing_details
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
# Yield completed/failed status (linked to saved result ID if available)
|
|
589
|
+
completed_msg_obj = await self._yield_and_save_tool_completed(
|
|
590
|
+
context,
|
|
591
|
+
saved_tool_result_object['message_id'] if saved_tool_result_object else None,
|
|
592
|
+
thread_id, thread_run_id
|
|
593
|
+
)
|
|
594
|
+
if completed_msg_obj: yield format_for_yield(completed_msg_obj)
|
|
595
|
+
# Don't add to yielded_tool_indices here, completion status is separate yield
|
|
596
|
+
|
|
597
|
+
# Yield the saved tool result object
|
|
598
|
+
if saved_tool_result_object:
|
|
599
|
+
tool_result_message_objects[tool_idx] = saved_tool_result_object
|
|
600
|
+
yield format_for_yield(saved_tool_result_object)
|
|
601
|
+
else:
|
|
602
|
+
logging.error(
|
|
603
|
+
f"Failed to save tool result for index {tool_idx}, not yielding result message.")
|
|
604
|
+
self.trace.event(name="failed_to_save_tool_result_for_index", level="ERROR",
|
|
605
|
+
status_message=(
|
|
606
|
+
f"Failed to save tool result for index {tool_idx}, not yielding result message."))
|
|
607
|
+
# Optionally yield error status for saving failure?
|
|
608
|
+
|
|
609
|
+
# --- Final Finish Status ---
|
|
610
|
+
if finish_reason and finish_reason != "xml_tool_limit_reached":
|
|
611
|
+
finish_content = {"status_type": "finish", "finish_reason": finish_reason}
|
|
612
|
+
finish_msg_obj = await self.add_message(
|
|
613
|
+
type="status", content=finish_content,
|
|
614
|
+
is_llm_message=False, metadata={"thread_run_id": thread_run_id}
|
|
615
|
+
)
|
|
616
|
+
if finish_msg_obj: yield format_for_yield(finish_msg_obj)
|
|
617
|
+
|
|
618
|
+
# Check if agent should terminate after processing pending tools
|
|
619
|
+
if agent_should_terminate:
|
|
620
|
+
logging.info(
|
|
621
|
+
"Agent termination requested after executing ask/complete tool. Stopping further processing.")
|
|
622
|
+
self.trace.event(name="agent_termination_requested", level="DEFAULT",
|
|
623
|
+
status_message="Agent termination requested after executing ask/complete tool. Stopping further processing.")
|
|
624
|
+
|
|
625
|
+
# Set finish reason to indicate termination
|
|
626
|
+
finish_reason = "agent_terminated"
|
|
627
|
+
|
|
628
|
+
# Save and yield termination status
|
|
629
|
+
finish_content = {"status_type": "finish", "finish_reason": "agent_terminated"}
|
|
630
|
+
finish_msg_obj = await self.add_message(
|
|
631
|
+
type="status", content=finish_content,
|
|
632
|
+
is_llm_message=False, metadata={"thread_run_id": thread_run_id}
|
|
633
|
+
)
|
|
634
|
+
if finish_msg_obj: yield format_for_yield(finish_msg_obj)
|
|
635
|
+
|
|
636
|
+
# Save assistant_response_end BEFORE terminating
|
|
637
|
+
if last_assistant_message_object:
|
|
638
|
+
try:
|
|
639
|
+
# Calculate response time if we have timing data
|
|
640
|
+
if streaming_metadata["first_chunk_time"] and streaming_metadata["last_chunk_time"]:
|
|
641
|
+
streaming_metadata["response_ms"] = (streaming_metadata["last_chunk_time"] -
|
|
642
|
+
streaming_metadata["first_chunk_time"]) * 1000
|
|
643
|
+
|
|
644
|
+
# Create a LiteLLM-like response object for streaming (before termination)
|
|
645
|
+
# Check if we have any actual usage data
|
|
646
|
+
has_usage_data = (
|
|
647
|
+
streaming_metadata["usage"]["prompt_tokens"] > 0 or
|
|
648
|
+
streaming_metadata["usage"]["completion_tokens"] > 0 or
|
|
649
|
+
streaming_metadata["usage"]["total_tokens"] > 0
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
assistant_end_content = {
|
|
653
|
+
"choices": [
|
|
654
|
+
{
|
|
655
|
+
"finish_reason": finish_reason or "stop",
|
|
656
|
+
"index": 0,
|
|
657
|
+
"message": {
|
|
658
|
+
"role": "assistant",
|
|
659
|
+
"content": accumulated_content,
|
|
660
|
+
"tool_calls": complete_native_tool_calls or None
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
],
|
|
664
|
+
"created": streaming_metadata.get("created"),
|
|
665
|
+
"model": streaming_metadata.get("model", llm_model),
|
|
666
|
+
"usage": streaming_metadata["usage"], # Always include usage like LiteLLM does
|
|
667
|
+
"streaming": True, # Add flag to indicate this was reconstructed from streaming
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
# Only include response_ms if we have timing data
|
|
671
|
+
if streaming_metadata.get("response_ms"):
|
|
672
|
+
assistant_end_content["response_ms"] = streaming_metadata["response_ms"]
|
|
673
|
+
|
|
674
|
+
await self.add_message(
|
|
675
|
+
type="assistant_response_end",
|
|
676
|
+
content=assistant_end_content,
|
|
677
|
+
is_llm_message=False,
|
|
678
|
+
metadata={"thread_run_id": thread_run_id}
|
|
679
|
+
)
|
|
680
|
+
logging.info("Assistant response end saved for stream (before termination)")
|
|
681
|
+
except Exception as e:
|
|
682
|
+
logging.error(f"Error saving assistant response end for stream (before termination): {str(e)}")
|
|
683
|
+
self.trace.event(name="error_saving_assistant_response_end_for_stream_before_termination",
|
|
684
|
+
level="ERROR", status_message=(
|
|
685
|
+
f"Error saving assistant response end for stream (before termination): {str(e)}"))
|
|
686
|
+
|
|
687
|
+
# Skip all remaining processing and go to finally block
|
|
688
|
+
return
|
|
689
|
+
|
|
690
|
+
# --- Save and Yield assistant_response_end ---
|
|
691
|
+
# Only save assistant_response_end if not auto-continuing (response is actually complete)
|
|
692
|
+
if not should_auto_continue:
|
|
693
|
+
if last_assistant_message_object: # Only save if assistant message was saved
|
|
694
|
+
try:
|
|
695
|
+
# Calculate response time if we have timing data
|
|
696
|
+
if streaming_metadata["first_chunk_time"] and streaming_metadata["last_chunk_time"]:
|
|
697
|
+
streaming_metadata["response_ms"] = (streaming_metadata["last_chunk_time"] -
|
|
698
|
+
streaming_metadata["first_chunk_time"]) * 1000
|
|
699
|
+
|
|
700
|
+
# Create a LiteLLM-like response object for streaming
|
|
701
|
+
# Check if we have any actual usage data
|
|
702
|
+
has_usage_data = (
|
|
703
|
+
streaming_metadata["usage"]["prompt_tokens"] > 0 or
|
|
704
|
+
streaming_metadata["usage"]["completion_tokens"] > 0 or
|
|
705
|
+
streaming_metadata["usage"]["total_tokens"] > 0
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
assistant_end_content = {
|
|
709
|
+
"choices": [
|
|
710
|
+
{
|
|
711
|
+
"finish_reason": finish_reason or "stop",
|
|
712
|
+
"index": 0,
|
|
713
|
+
"message": {
|
|
714
|
+
"role": "assistant",
|
|
715
|
+
"content": accumulated_content,
|
|
716
|
+
"tool_calls": complete_native_tool_calls or None
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
],
|
|
720
|
+
"created": streaming_metadata.get("created"),
|
|
721
|
+
"model": streaming_metadata.get("model", llm_model),
|
|
722
|
+
"usage": streaming_metadata["usage"], # Always include usage like LiteLLM does
|
|
723
|
+
"streaming": True, # Add flag to indicate this was reconstructed from streaming
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
# Only include response_ms if we have timing data
|
|
727
|
+
if streaming_metadata.get("response_ms"):
|
|
728
|
+
assistant_end_content["response_ms"] = streaming_metadata["response_ms"]
|
|
729
|
+
|
|
730
|
+
await self.add_message(
|
|
731
|
+
type="assistant_response_end",
|
|
732
|
+
content=assistant_end_content,
|
|
733
|
+
is_llm_message=False,
|
|
734
|
+
metadata={"thread_run_id": thread_run_id}
|
|
735
|
+
)
|
|
736
|
+
logging.info("Assistant response end saved for stream")
|
|
737
|
+
except Exception as e:
|
|
738
|
+
logging.error(f"Error saving assistant response end for stream: {str(e)}")
|
|
739
|
+
self.trace.event(name="error_saving_assistant_response_end_for_stream", level="ERROR",
|
|
740
|
+
status_message=(f"Error saving assistant response end for stream: {str(e)}"))
|
|
741
|
+
|
|
742
|
+
except Exception as e:
|
|
743
|
+
logging.error(f"Error processing stream: {str(e)}", exc_info=True)
|
|
744
|
+
self.trace.event(name="error_processing_stream", level="ERROR",
|
|
745
|
+
status_message=(f"Error processing stream: {str(e)}"))
|
|
746
|
+
# Save and yield error status message
|
|
747
|
+
|
|
748
|
+
err_content = {"role": "system", "status_type": "error", "message": str(e)}
|
|
749
|
+
if (not "AnthropicException - Overloaded" in str(e)):
|
|
750
|
+
err_msg_obj = await self.add_message(
|
|
751
|
+
type="status", content=err_content,
|
|
752
|
+
is_llm_message=False,
|
|
753
|
+
metadata={"thread_run_id": thread_run_id if 'thread_run_id' in locals() else None}
|
|
754
|
+
)
|
|
755
|
+
if err_msg_obj: yield format_for_yield(err_msg_obj) # Yield the saved error message
|
|
756
|
+
# Re-raise the same exception (not a new one) to ensure proper error propagation
|
|
757
|
+
logging.critical(f"Re-raising error to stop further processing: {str(e)}")
|
|
758
|
+
self.trace.event(name="re_raising_error_to_stop_further_processing", level="ERROR",
|
|
759
|
+
status_message=(f"Re-raising error to stop further processing: {str(e)}"))
|
|
760
|
+
else:
|
|
761
|
+
logging.error(f"AnthropicException - Overloaded detected - Falling back to OpenRouter: {str(e)}",
|
|
762
|
+
exc_info=True)
|
|
763
|
+
self.trace.event(name="anthropic_exception_overloaded_detected", level="ERROR", status_message=(
|
|
764
|
+
f"AnthropicException - Overloaded detected - Falling back to OpenRouter: {str(e)}"))
|
|
765
|
+
raise # Use bare 'raise' to preserve the original exception with its traceback
|
|
766
|
+
|
|
767
|
+
finally:
|
|
768
|
+
# Update continuous state for potential auto-continue
|
|
769
|
+
if should_auto_continue:
|
|
770
|
+
continuous_state['accumulated_content'] = accumulated_content
|
|
771
|
+
continuous_state['sequence'] = __sequence
|
|
772
|
+
|
|
773
|
+
logging.info(f"Updated continuous state for auto-continue with {len(accumulated_content)} chars")
|
|
774
|
+
else:
|
|
775
|
+
# Save and Yield the final thread_run_end status (only if not auto-continuing and finish_reason is not 'length')
|
|
776
|
+
try:
|
|
777
|
+
end_content = {"status_type": "thread_run_end"}
|
|
778
|
+
end_msg_obj = await self.add_message(
|
|
779
|
+
type="status", content=end_content,
|
|
780
|
+
is_llm_message=False,
|
|
781
|
+
metadata={"thread_run_id": thread_run_id if 'thread_run_id' in locals() else None}
|
|
782
|
+
)
|
|
783
|
+
if end_msg_obj: yield format_for_yield(end_msg_obj)
|
|
784
|
+
except Exception as final_e:
|
|
785
|
+
logging.error(f"Error in finally block: {str(final_e)}", exc_info=True)
|
|
786
|
+
self.trace.event(name="error_in_finally_block", level="ERROR",
|
|
787
|
+
status_message=(f"Error in finally block: {str(final_e)}"))
|