xgae 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xgae might be problematic. Click here for more details.

@@ -1,57 +1,11 @@
1
- import asyncio
2
- import json
3
1
  import logging
4
- import uuid
5
2
 
6
- from dataclasses import dataclass
7
- from datetime import datetime, timezone
8
- from typing import List, Dict, Any, Optional, AsyncGenerator, override, Literal
9
-
10
- from xgae.engine.responser.responser_base import TaskResponseProcessor, TaskResponserContext,TaskRunContinuousState,XmlAddingStrategy,ToolExecutionStrategy
11
- from xgae.utils.json_helpers import (
12
- ensure_dict, safe_json_parse,
13
- to_json_string, format_for_yield
14
- )
15
-
16
- @dataclass
17
- class ProcessorConfig:
18
- """
19
- Configuration for response processing and tool execution.
20
-
21
- This class controls how the LLM's responses are processed, including how tool calls
22
- are detected, executed, and their results handled.
23
-
24
- Attributes:
25
- xml_tool_calling: Enable XML-based tool call detection (<tool>...</tool>)
26
- native_tool_calling: Enable OpenAI-style function calling format
27
- execute_tools: Whether to automatically execute detected tool calls
28
- execute_on_stream: For streaming, execute tools as they appear vs. at the end
29
- tool_execution_strategy: How to execute multiple tools ("sequential" or "parallel")
30
- xml_adding_strategy: How to add XML tool results to the conversation
31
- max_xml_tool_calls: Maximum number of XML tool calls to process (0 = no limit)
32
- """
33
-
34
- xml_tool_calling: bool = True
35
- native_tool_calling: bool = False
36
-
37
- execute_tools: bool = True
38
- execute_on_stream: bool = False
39
- tool_execution_strategy: ToolExecutionStrategy = "sequential"
40
- xml_adding_strategy: XmlAddingStrategy = "assistant_message"
41
- max_xml_tool_calls: int = 0 # 0 means no limit
42
-
43
- def __post_init__(self):
44
- """Validate configuration after initialization."""
45
- if self.xml_tool_calling is False and self.native_tool_calling is False and self.execute_tools:
46
- raise ValueError(
47
- "At least one tool calling format (XML or native) must be enabled if execute_tools is True")
48
-
49
- if self.xml_adding_strategy not in ["user_message", "assistant_message", "inline_edit"]:
50
- raise ValueError("xml_adding_strategy must be 'user_message', 'assistant_message', or 'inline_edit'")
51
-
52
- if self.max_xml_tool_calls < 0:
53
- raise ValueError("max_xml_tool_calls must be a non-negative integer (0 = no limit)")
3
+ from typing import List, Dict, Any, Optional, AsyncGenerator, override
4
+ from importlib_metadata import metadata
54
5
 
6
+ from xgae.utils import handle_error
7
+ from xgae.utils.json_helpers import format_for_yield
8
+ from xgae.engine.responser.responser_base import TaskResponseProcessor, TaskResponserContext, TaskRunContinuousState
55
9
 
56
10
 
57
11
  class StreamTaskResponser(TaskResponseProcessor):
@@ -59,767 +13,126 @@ class StreamTaskResponser(TaskResponseProcessor):
59
13
  super().__init__(response_context)
60
14
 
61
15
  @override
62
- async def process_response(
63
- self,
64
- llm_response: AsyncGenerator,
65
- prompt_messages: List[Dict[str, Any]],
66
- continuous_state: Optional[TaskRunContinuousState] = None,
67
- ) -> AsyncGenerator[Dict[str, Any], None]:
68
- """Process a streaming LLM response, handling tool calls and execution.
69
-
70
- Args:
71
- llm_response: Streaming response from the LLM
72
- thread_id: ID of the conversation thread
73
- prompt_messages: List of messages sent to the LLM (the prompt)
74
- llm_model: The name of the LLM model used
75
- config: Configuration for parsing and execution
76
- can_auto_continue: Whether auto-continue is enabled
77
- auto_continue_count: Number of auto-continue cycles
78
- continuous_state: Previous state of the conversation
79
-
80
- Yields:
81
- Complete message objects matching the DB schema, except for content chunks.
82
- """
83
- # Initialize from continuous state if provided (for auto-continue)
84
- can_auto_continue = continuous_state.get("auto_continue", False)
85
- auto_continue_count = continuous_state.get("auto_continue_count", 0)
86
- llm_model = self.response_context.get("model_name")
87
- config: ProcessorConfig = ProcessorConfig()
88
- thread_id = self.response_context.get("task_id")
89
-
90
- continuous_state = continuous_state or {}
16
+ async def process_response(self,
17
+ llm_response: AsyncGenerator,
18
+ prompt_messages: List[Dict[str, Any]],
19
+ continuous_state: TaskRunContinuousState
20
+ ) -> AsyncGenerator[Dict[str, Any], None]:
91
21
  accumulated_content = continuous_state.get('accumulated_content', "")
92
- tool_calls_buffer = {}
93
- current_xml_content = accumulated_content # equal to accumulated_content if auto-continuing, else blank
94
- xml_chunks_buffer = []
95
- pending_tool_executions = []
96
- yielded_tool_indices = set() # Stores indices of tools whose *status* has been yielded
97
- tool_index = 0
98
- xml_tool_call_count = 0
22
+ auto_continue_count = continuous_state.get('auto_continue_count', 0)
23
+ can_auto_continue = continuous_state.get("auto_continue", False)
24
+ use_assistant_chunk_msg = self.response_context.get("use_assistant_chunk_msg")
25
+
99
26
  finish_reason = None
100
27
  should_auto_continue = False
101
- last_assistant_message_object = None # Store the final saved assistant message object
102
- tool_result_message_objects = {} # tool_index -> full saved message object
103
- has_printed_thinking_prefix = False # Flag for printing thinking prefix only once
104
- agent_should_terminate = False # Flag to track if a terminating tool has been executed
105
- complete_native_tool_calls = [] # Initialize early for use in assistant_response_end
106
-
107
- # Collect metadata for reconstructing LiteLLM response object
108
- streaming_metadata = {
109
- "model": llm_model,
110
- "created": None,
111
- "usage": {
112
- "prompt_tokens": 0,
113
- "completion_tokens": 0,
114
- "total_tokens": 0
115
- },
116
- "response_ms": None,
117
- "first_chunk_time": None,
118
- "last_chunk_time": None
119
- }
120
-
121
- logging.info(f"Streaming Config: XML={config.xml_tool_calling}, Native={config.native_tool_calling}, "
122
- f"Execute on stream={config.execute_on_stream}, Strategy={config.tool_execution_strategy}")
123
-
124
- # Reuse thread_run_id for auto-continue or create new one
125
- thread_run_id = continuous_state.get('thread_run_id') or str(uuid.uuid4())
126
- continuous_state['thread_run_id'] = thread_run_id
127
-
28
+ sequence = continuous_state.get('assistant_msg_sequence', 0)
29
+
128
30
  try:
129
- # --- Save and Yield Start Events (only if not auto-continuing) ---
130
- if auto_continue_count == 0:
131
- start_content = {"status_type": "thread_run_start", "thread_run_id": thread_run_id}
132
- start_msg_obj = await self.add_response_message(
133
- type="status", content=start_content,
134
- is_llm_message=False, metadata={"thread_run_id": thread_run_id}
135
- )
136
- if start_msg_obj: yield format_for_yield(start_msg_obj)
137
-
138
- assist_start_content = {"status_type": "assistant_response_start"}
139
- assist_start_msg_obj = await self.add_response_message(
140
- type="status", content=assist_start_content,
141
- is_llm_message=False, metadata={"thread_run_id": thread_run_id}
142
- )
143
- if assist_start_msg_obj: yield format_for_yield(assist_start_msg_obj)
144
- # --- End Start Events ---
145
-
146
- __sequence = continuous_state.get('sequence', 0) # get the sequence from the previous auto-continue cycle
31
+ async for llm_chunk in llm_response:
32
+ if hasattr(llm_chunk, 'choices') and llm_chunk.choices and hasattr(llm_chunk.choices[0], 'finish_reason'):
33
+ if llm_chunk.choices[0].finish_reason:
34
+ finish_reason = llm_chunk.choices[0].finish_reason
35
+ logging.info(f"StreamResp:LLM chunk response finish_reason={finish_reason}")
147
36
 
148
- async for chunk in llm_response:
149
- # Extract streaming metadata from chunks
150
- current_time = datetime.now(timezone.utc).timestamp()
151
- if streaming_metadata["first_chunk_time"] is None:
152
- streaming_metadata["first_chunk_time"] = current_time
153
- streaming_metadata["last_chunk_time"] = current_time
37
+ if hasattr(llm_chunk, 'choices') and llm_chunk.choices:
38
+ llm_chunk_msg = llm_chunk.choices[0].delta if hasattr(llm_chunk.choices[0], 'delta') else None
154
39
 
155
- # Extract metadata from chunk attributes
156
- if hasattr(chunk, 'created') and chunk.created:
157
- streaming_metadata["created"] = chunk.created
158
- if hasattr(chunk, 'model') and chunk.model:
159
- streaming_metadata["model"] = chunk.model
160
- if hasattr(chunk, 'usage') and chunk.usage:
161
- # Update usage information if available (including zero values)
162
- if hasattr(chunk.usage, 'prompt_tokens') and chunk.usage.prompt_tokens is not None:
163
- streaming_metadata["usage"]["prompt_tokens"] = chunk.usage.prompt_tokens
164
- if hasattr(chunk.usage, 'completion_tokens') and chunk.usage.completion_tokens is not None:
165
- streaming_metadata["usage"]["completion_tokens"] = chunk.usage.completion_tokens
166
- if hasattr(chunk.usage, 'total_tokens') and chunk.usage.total_tokens is not None:
167
- streaming_metadata["usage"]["total_tokens"] = chunk.usage.total_tokens
168
-
169
- if hasattr(chunk, 'choices') and chunk.choices and hasattr(chunk.choices[0], 'finish_reason') and \
170
- chunk.choices[0].finish_reason:
171
- finish_reason = chunk.choices[0].finish_reason
172
- logging.debug(f"Detected finish_reason: {finish_reason}")
173
-
174
- if hasattr(chunk, 'choices') and chunk.choices:
175
- delta = chunk.choices[0].delta if hasattr(chunk.choices[0], 'delta') else None
176
-
177
- # Check for and log Anthropic thinking content
178
- if delta and hasattr(delta, 'reasoning_content') and delta.reasoning_content:
179
- if not has_printed_thinking_prefix:
180
- # print("[THINKING]: ", end='', flush=True)
181
- has_printed_thinking_prefix = True
182
- # print(delta.reasoning_content, end='', flush=True)
183
- # Append reasoning to main content to be saved in the final message
184
- accumulated_content += delta.reasoning_content
185
-
186
- # Process content chunk
187
- if delta and hasattr(delta, 'content') and delta.content:
188
- chunk_content = delta.content
189
- # print(chunk_content, end='', flush=True)
40
+ if llm_chunk_msg and hasattr(llm_chunk_msg, 'content') and llm_chunk_msg.content:
41
+ chunk_content = llm_chunk_msg.content
190
42
  accumulated_content += chunk_content
191
- current_xml_content += chunk_content
192
-
193
- if not (config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls):
194
- # Yield ONLY content chunk (don't save)
195
- now_chunk = datetime.now(timezone.utc).isoformat()
196
- yield {
197
- "sequence": __sequence,
198
- "message_id": None, "thread_id": thread_id, "type": "assistant",
199
- "is_llm_message": True,
200
- "content": to_json_string({"role": "assistant", "content": chunk_content}),
201
- "metadata": to_json_string({"stream_status": "chunk", "thread_run_id": thread_run_id}),
202
- "created_at": now_chunk, "updated_at": now_chunk
203
- }
204
- __sequence += 1
205
- else:
206
- logging.info("XML tool call limit reached - not yielding more content chunks")
207
- self.root_span.event(name="xml_tool_call_limit_reached", level="DEFAULT", status_message=(
208
- f"XML tool call limit reached - not yielding more content chunks"))
209
-
210
- # --- Process XML Tool Calls (if enabled and limit not reached) ---
211
- if config.xml_tool_calling and not (
212
- config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls):
213
- xml_chunks = self._extract_xml_chunks(current_xml_content)
214
- for xml_chunk in xml_chunks:
215
- current_xml_content = current_xml_content.replace(xml_chunk, "", 1)
216
- xml_chunks_buffer.append(xml_chunk)
217
- result = self._parse_xml_tool_call(xml_chunk)
218
- if result:
219
- tool_call, parsing_details = result
220
- xml_tool_call_count += 1
221
- current_assistant_id = last_assistant_message_object[
222
- 'message_id'] if last_assistant_message_object else None
223
- context = self._create_tool_context(
224
- tool_call, tool_index, current_assistant_id, parsing_details
225
- )
226
-
227
- if config.execute_tools and config.execute_on_stream:
228
- # Save and Yield tool_started status
229
- started_msg_obj = await self._add_tool_start_message(context)
230
- if started_msg_obj: yield format_for_yield(started_msg_obj)
231
- yielded_tool_indices.add(tool_index) # Mark status as yielded
232
-
233
- execution_task = asyncio.create_task(self._execute_tool(tool_call))
234
- pending_tool_executions.append({
235
- "task": execution_task, "tool_call": tool_call,
236
- "tool_index": tool_index, "context": context
237
- })
238
- tool_index += 1
239
-
240
- if config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls:
241
- logging.debug(f"Reached XML tool call limit ({config.max_xml_tool_calls})")
242
- finish_reason = "xml_tool_limit_reached"
243
- break # Stop processing more XML chunks in this delta
244
-
245
- # --- Process Native Tool Call Chunks ---
246
- if config.native_tool_calling and delta and hasattr(delta, 'tool_calls') and delta.tool_calls:
247
- for tool_call_chunk in delta.tool_calls:
248
- # Yield Native Tool Call Chunk (transient status, not saved)
249
- # ... (safe extraction logic for tool_call_data_chunk) ...
250
- tool_call_data_chunk = {} # Placeholder for extracted data
251
- if hasattr(tool_call_chunk, 'model_dump'):
252
- tool_call_data_chunk = tool_call_chunk.model_dump()
253
- else: # Manual extraction...
254
- if hasattr(tool_call_chunk, 'id'): tool_call_data_chunk['id'] = tool_call_chunk.id
255
- if hasattr(tool_call_chunk, 'index'): tool_call_data_chunk[
256
- 'index'] = tool_call_chunk.index
257
- if hasattr(tool_call_chunk, 'type'): tool_call_data_chunk['type'] = tool_call_chunk.type
258
- if hasattr(tool_call_chunk, 'function'):
259
- tool_call_data_chunk['function'] = {}
260
- if hasattr(tool_call_chunk.function, 'name'): tool_call_data_chunk['function'][
261
- 'name'] = tool_call_chunk.function.name
262
- if hasattr(tool_call_chunk.function, 'arguments'): tool_call_data_chunk['function'][
263
- 'arguments'] = tool_call_chunk.function.arguments if isinstance(
264
- tool_call_chunk.function.arguments, str) else to_json_string(
265
- tool_call_chunk.function.arguments)
266
-
267
- now_tool_chunk = datetime.now(timezone.utc).isoformat()
268
- yield {
269
- "message_id": None, "thread_id": thread_id, "type": "status", "is_llm_message": True,
270
- "content": to_json_string({"role": "assistant", "status_type": "tool_call_chunk",
271
- "tool_call_chunk": tool_call_data_chunk}),
272
- "metadata": to_json_string({"thread_run_id": thread_run_id}),
273
- "created_at": now_tool_chunk, "updated_at": now_tool_chunk
274
- }
275
-
276
- # --- Buffer and Execute Complete Native Tool Calls ---
277
- if not hasattr(tool_call_chunk, 'function'): continue
278
- idx = tool_call_chunk.index if hasattr(tool_call_chunk, 'index') else 0
279
- # ... (buffer update logic remains same) ...
280
- # ... (check complete logic remains same) ...
281
- has_complete_tool_call = False # Placeholder
282
- if (tool_calls_buffer.get(idx) and
283
- tool_calls_buffer[idx]['id'] and
284
- tool_calls_buffer[idx]['function']['name'] and
285
- tool_calls_buffer[idx]['function']['arguments']):
286
- try:
287
- safe_json_parse(tool_calls_buffer[idx]['function']['arguments'])
288
- has_complete_tool_call = True
289
- except json.JSONDecodeError:
290
- pass
291
-
292
- if has_complete_tool_call and config.execute_tools and config.execute_on_stream:
293
- current_tool = tool_calls_buffer[idx]
294
- tool_call_data = {
295
- "function_name": current_tool['function']['name'],
296
- "arguments": safe_json_parse(current_tool['function']['arguments']),
297
- "id": current_tool['id']
298
- }
299
- current_assistant_id = last_assistant_message_object[
300
- 'message_id'] if last_assistant_message_object else None
301
- context = self._create_tool_context(
302
- tool_call_data, tool_index, current_assistant_id
303
- )
304
-
305
- # Save and Yield tool_started status
306
- started_msg_obj = await self._add_tool_start_message(context)
307
- if started_msg_obj: yield format_for_yield(started_msg_obj)
308
- yielded_tool_indices.add(tool_index) # Mark status as yielded
309
-
310
- execution_task = asyncio.create_task(self._execute_tool(tool_call_data))
311
- pending_tool_executions.append({
312
- "task": execution_task, "tool_call": tool_call_data,
313
- "tool_index": tool_index, "context": context
314
- })
315
- tool_index += 1
316
-
317
- if finish_reason == "xml_tool_limit_reached":
318
- logging.info("Stopping stream processing after loop due to XML tool call limit")
319
- self.root_span.event(name="stopping_stream_processing_after_loop_due_to_xml_tool_call_limit",
320
- level="DEFAULT", status_message=(
321
- f"Stopping stream processing after loop due to XML tool call limit"))
322
- break
323
-
324
- # print() # Add a final newline after the streaming loop finishes
325
-
326
- # --- After Streaming Loop ---
327
-
328
- if (
329
- streaming_metadata["usage"]["total_tokens"] == 0
330
- ):
331
- logging.info("🔥 No usage data from provider, counting with litellm.token_counter")
332
43
 
333
- try:
334
- # prompt side
335
- # prompt_tokens = token_counter(
336
- # model=llm_model,
337
- # messages=prompt_messages # chat or plain; token_counter handles both
338
- # )
339
- #
340
- # # completion side
341
- # completion_tokens = token_counter(
342
- # model=llm_model,
343
- # text=accumulated_content or "" # empty string safe
344
- # )
44
+ xml_tool_call_count = len(self._extract_xml_chunks(accumulated_content))
45
+ if self.max_xml_tool_calls <= 0 or xml_tool_call_count < self.max_xml_tool_calls:
46
+ if use_assistant_chunk_msg:
47
+ message_data = {"role": "assistant", "content": chunk_content}
48
+ metadata = {"sequence": sequence}
49
+ assistant_chunk_msg = self.create_response_message(type="assistant_chunk", content=message_data,
50
+ is_llm_message=True, metadata=metadata)
51
+ yield assistant_chunk_msg
345
52
 
346
- # streaming_metadata["usage"]["prompt_tokens"] = prompt_tokens
347
- # streaming_metadata["usage"]["completion_tokens"] = completion_tokens
348
- # streaming_metadata["usage"]["total_tokens"] = prompt_tokens + completion_tokens
349
- #
350
- # logging.info(
351
- # f"🔥 Estimated tokens – prompt: {prompt_tokens}, "
352
- # f"completion: {completion_tokens}, total: {prompt_tokens + completion_tokens}"
353
- # )
354
- self.root_span.event(name="usage_calculated_with_litellm_token_counter", level="DEFAULT",
355
- status_message=(f"Usage calculated with litellm.token_counter"))
356
- except Exception as e:
357
- logging.warning(f"Failed to calculate usage: {str(e)}")
358
- self.root_span.event(name="failed_to_calculate_usage", level="WARNING",
359
- status_message=(f"Failed to calculate usage: {str(e)}"))
360
-
361
- # Wait for pending tool executions from streaming phase
362
- tool_results_buffer = [] # Stores (tool_call, result, tool_index, context)
363
- if pending_tool_executions:
364
- logging.info(f"Waiting for {len(pending_tool_executions)} pending streamed tool executions")
365
- self.root_span.event(name="waiting_for_pending_streamed_tool_executions", level="DEFAULT", status_message=(
366
- f"Waiting for {len(pending_tool_executions)} pending streamed tool executions"))
367
- # ... (asyncio.wait logic) ...
368
- pending_tasks = [execution["task"] for execution in pending_tool_executions]
369
- done, _ = await asyncio.wait(pending_tasks)
370
-
371
- for execution in pending_tool_executions:
372
- tool_idx = execution.get("tool_index", -1)
373
- context = execution["context"]
374
- tool_name = context.function_name
375
-
376
- # Check if status was already yielded during stream run
377
- if tool_idx in yielded_tool_indices:
378
- logging.debug(f"Status for tool index {tool_idx} already yielded.")
379
- # Still need to process the result for the buffer
380
- try:
381
- if execution["task"].done():
382
- result = execution["task"].result()
383
- context.result = result
384
- tool_results_buffer.append((execution["tool_call"], result, tool_idx, context))
385
-
386
- if tool_name in ['ask', 'complete']:
387
- logging.info(
388
- f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag.")
389
- self.root_span.event(name="terminating_tool_completed_during_streaming",
390
- level="DEFAULT", status_message=(
391
- f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag."))
392
- agent_should_terminate = True
393
-
394
- else: # Should not happen with asyncio.wait
395
- logging.warning(f"Task for tool index {tool_idx} not done after wait.")
396
- self.root_span.event(name="task_for_tool_index_not_done_after_wait", level="WARNING",
397
- status_message=(
398
- f"Task for tool index {tool_idx} not done after wait."))
399
- except Exception as e:
400
- logging.error(f"Error getting result for pending tool execution {tool_idx}: {str(e)}")
401
- self.root_span.event(name="error_getting_result_for_pending_tool_execution", level="ERROR",
402
- status_message=(
403
- f"Error getting result for pending tool execution {tool_idx}: {str(e)}"))
404
- context.error = e
405
- # Save and Yield tool error status message (even if started was yielded)
406
- error_msg_obj = await self._add_tool_error_message(context)
407
- if error_msg_obj: yield format_for_yield(error_msg_obj)
408
- continue # Skip further status yielding for this tool index
409
-
410
- # If status wasn't yielded before (shouldn't happen with current logic), yield it now
411
- try:
412
- if execution["task"].done():
413
- result = execution["task"].result()
414
- context.result = result
415
- tool_results_buffer.append((execution["tool_call"], result, tool_idx, context))
416
-
417
- # Check if this is a terminating tool
418
- if tool_name in ['ask', 'complete']:
419
- logging.info(
420
- f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag.")
421
- self.root_span.event(name="terminating_tool_completed_during_streaming", level="DEFAULT",
422
- status_message=(
423
- f"Terminating tool '{tool_name}' completed during streaming. Setting termination flag."))
424
- agent_should_terminate = True
53
+ sequence += 1
54
+ else:
55
+ finish_reason = "xml_tool_limit_reached"
56
+ break
425
57
 
426
- # Save and Yield tool completed/failed status
427
- completed_msg_obj = await self._add_tool_completed_message(
428
- context, None)
429
- if completed_msg_obj: yield format_for_yield(completed_msg_obj)
430
- yielded_tool_indices.add(tool_idx)
431
- except Exception as e:
432
- logging.error(
433
- f"Error getting result/yielding status for pending tool execution {tool_idx}: {str(e)}")
434
- self.root_span.event(name="error_getting_result_yielding_status_for_pending_tool_execution",
435
- level="ERROR", status_message=(
436
- f"Error getting result/yielding status for pending tool execution {tool_idx}: {str(e)}"))
437
- context.error = e
438
- # Save and Yield tool error status
439
- error_msg_obj = await self._add_tool_error_message(context)
440
- if error_msg_obj: yield format_for_yield(error_msg_obj)
441
- yielded_tool_indices.add(tool_idx)
58
+ if len(accumulated_content) == 0:
59
+ logging.warning(f"StreamResp: LLM response_message content is empty")
442
60
 
443
- # Save and yield finish status if limit was reached
444
61
  if finish_reason == "xml_tool_limit_reached":
445
- finish_content = {"status_type": "finish", "finish_reason": "xml_tool_limit_reached"}
446
- finish_msg_obj = await self.add_response_message(
447
- type="status", content=finish_content,
448
- is_llm_message=False, metadata={"thread_run_id": thread_run_id}
449
- )
450
- if finish_msg_obj: yield format_for_yield(finish_msg_obj)
451
- logging.info(
452
- f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls")
453
- self.root_span.event(name="stream_finished_with_reason_xml_tool_limit_reached_after_xml_tool_calls",
454
- level="DEFAULT", status_message=(
455
- f"Stream finished with reason: xml_tool_limit_reached after {xml_tool_call_count} XML tool calls"))
62
+ xml_chunks = self._extract_xml_chunks(accumulated_content)
63
+ if len(xml_chunks) > self.max_xml_tool_calls:
64
+ limited_chunks = xml_chunks[:self.max_xml_tool_calls]
65
+ if limited_chunks:
66
+ last_chunk = limited_chunks[-1]
67
+ last_chunk_pos = accumulated_content.find(last_chunk) + len(last_chunk)
68
+ accumulated_content = accumulated_content[:last_chunk_pos]
69
+ parsed_xml_data = self._parse_xml_tool_calls(accumulated_content)
456
70
 
457
- # Calculate if auto-continue is needed if the finish reason is length
458
71
  should_auto_continue = (can_auto_continue and finish_reason == 'length')
459
72
 
460
- # --- SAVE and YIELD Final Assistant Message ---
461
- # Only save assistant message if NOT auto-continuing due to length to avoid duplicate messages
462
- if accumulated_content and not should_auto_continue:
463
- # ... (Truncate accumulated_content logic) ...
464
- if config.max_xml_tool_calls > 0 and xml_tool_call_count >= config.max_xml_tool_calls and xml_chunks_buffer:
465
- last_xml_chunk = xml_chunks_buffer[-1]
466
- last_chunk_end_pos = accumulated_content.find(last_xml_chunk) + len(last_xml_chunk)
467
- if last_chunk_end_pos > 0:
468
- accumulated_content = accumulated_content[:last_chunk_end_pos]
469
-
470
- # ... (Extract complete_native_tool_calls logic) ...
471
- # Update complete_native_tool_calls from buffer (initialized earlier)
472
- if config.native_tool_calling:
473
- for idx, tc_buf in tool_calls_buffer.items():
474
- if tc_buf['id'] and tc_buf['function']['name'] and tc_buf['function']['arguments']:
475
- try:
476
- args = safe_json_parse(tc_buf['function']['arguments'])
477
- complete_native_tool_calls.append({
478
- "id": tc_buf['id'], "type": "function",
479
- "function": {"name": tc_buf['function']['name'], "arguments": args}
480
- })
481
- except json.JSONDecodeError:
482
- continue
73
+ self.root_span.event(name=f"stream_processor_start[{self.task_no}]({auto_continue_count})",level="DEFAULT",
74
+ status_message=f"finish_reason={finish_reason}, tool_exec_strategy={self.tool_execution_strategy}, "
75
+ f"parsed_xml_data_len={len(parsed_xml_data)}, accumulated_content={len(accumulated_content)}, "
76
+ f"should_auto_continue={should_auto_continue}")
483
77
 
484
- message_data = { # Dict to be saved in 'content'
485
- "role": "assistant", "content": accumulated_content,
486
- "tool_calls": complete_native_tool_calls or None
487
- }
488
-
489
- last_assistant_message_object = await self.add_response_message(type="assistant", content=message_data,
490
- is_llm_message=True, metadata={"thread_run_id": thread_run_id}
491
- )
492
-
493
- if last_assistant_message_object:
494
- # Yield the complete saved object, adding stream_status metadata just for yield
495
- yield_metadata = ensure_dict(last_assistant_message_object.get('metadata'), {})
496
- yield_metadata['stream_status'] = 'complete'
497
- # Format the message for yielding
498
- yield_message = last_assistant_message_object.copy()
499
- yield_message['metadata'] = yield_metadata
500
- yield format_for_yield(yield_message)
501
- else:
502
- logging.error(f"Failed to save final assistant message for thread {thread_id}")
503
- self.root_span.event(name="failed_to_save_final_assistant_message_for_thread", level="ERROR",
504
- status_message=(f"Failed to save final assistant message for thread {thread_id}"))
505
- # Save and yield an error status
506
- err_content = {"role": "system", "status_type": "error",
507
- "message": "Failed to save final assistant message"}
508
- err_msg_obj = await self.add_response_message(
509
- type="status", content=err_content,
510
- is_llm_message=False, metadata={"thread_run_id": thread_run_id}
511
- )
512
- if err_msg_obj: yield format_for_yield(err_msg_obj)
513
-
514
- # --- Process All Tool Results Now ---
515
- if config.execute_tools:
516
- final_tool_calls_to_process = []
517
- # ... (Gather final_tool_calls_to_process from native and XML buffers) ...
518
- # Gather native tool calls from buffer
519
- if config.native_tool_calling and complete_native_tool_calls:
520
- for tc in complete_native_tool_calls:
521
- final_tool_calls_to_process.append({
522
- "function_name": tc["function"]["name"],
523
- "arguments": tc["function"]["arguments"], # Already parsed object
524
- "id": tc["id"]
525
- })
526
- # Gather XML tool calls from buffer (up to limit)
527
- parsed_xml_data = []
528
- if config.xml_tool_calling:
529
- # Reparse remaining content just in case (should be empty if processed correctly)
530
- xml_chunks = self._extract_xml_chunks(current_xml_content)
531
- xml_chunks_buffer.extend(xml_chunks)
532
- # Process only chunks not already handled in the stream loop
533
- remaining_limit = config.max_xml_tool_calls - xml_tool_call_count if config.max_xml_tool_calls > 0 else len(
534
- xml_chunks_buffer)
535
- xml_chunks_to_process = xml_chunks_buffer[:remaining_limit] # Ensure limit is respected
536
-
537
- for chunk in xml_chunks_to_process:
538
- parsed_result = self._parse_xml_tool_call(chunk)
539
- if parsed_result:
540
- tool_call, parsing_details = parsed_result
541
- # Avoid adding if already processed during streaming
542
- if not any(exec['tool_call'] == tool_call for exec in pending_tool_executions):
543
- final_tool_calls_to_process.append(tool_call)
544
- parsed_xml_data.append({'tool_call': tool_call, 'parsing_details': parsing_details})
545
-
546
- all_tool_data_map = {} # tool_index -> {'tool_call': ..., 'parsing_details': ...}
547
- # Add native tool data
548
- native_tool_index = 0
549
- if config.native_tool_calling and complete_native_tool_calls:
550
- for tc in complete_native_tool_calls:
551
- # Find the corresponding entry in final_tool_calls_to_process if needed
552
- # For now, assume order matches if only native used
553
- exec_tool_call = {
554
- "function_name": tc["function"]["name"],
555
- "arguments": tc["function"]["arguments"],
556
- "id": tc["id"]
557
- }
558
- all_tool_data_map[native_tool_index] = {"tool_call": exec_tool_call, "parsing_details": None}
559
- native_tool_index += 1
78
+ assistant_msg = None
79
+ if accumulated_content and not should_auto_continue:
80
+ message_data = {"role": "assistant", "content": accumulated_content}
81
+ assistant_msg = self.add_response_message(type="assistant", content=message_data,
82
+ is_llm_message=True)
83
+ yield assistant_msg
560
84
 
561
- # Add XML tool data
562
- xml_tool_index_start = native_tool_index
563
- for idx, item in enumerate(parsed_xml_data):
564
- all_tool_data_map[xml_tool_index_start + idx] = item
85
+ tool_calls_to_execute = [item['tool_call'] for item in parsed_xml_data]
86
+ if len(tool_calls_to_execute) > 0:
87
+ tool_results = await self._execute_tools(tool_calls_to_execute, self.tool_execution_strategy)
565
88
 
566
- tool_results_map = {} # tool_index -> (tool_call, result, context)
89
+ tool_index = 0
90
+ for i, (returned_tool_call, tool_result) in enumerate(tool_results):
91
+ parsed_xml_item = parsed_xml_data[i]
92
+ tool_call = parsed_xml_item['tool_call']
93
+ parsing_details = parsed_xml_item['parsing_details']
94
+ assistant_msg_id = assistant_msg['message_id'] if assistant_msg else None
567
95
 
568
- # Populate from buffer if executed on stream
569
- if config.execute_on_stream and tool_results_buffer:
570
- logging.info(f"Processing {len(tool_results_buffer)} buffered tool results")
571
- self.root_span.event(name="processing_buffered_tool_results", level="DEFAULT",
572
- status_message=(f"Processing {len(tool_results_buffer)} buffered tool results"))
573
- for tool_call, result, tool_idx, context in tool_results_buffer:
574
- if last_assistant_message_object: context.assistant_message_id = last_assistant_message_object[
575
- 'message_id']
576
- tool_results_map[tool_idx] = (tool_call, result, context)
96
+ tool_context = self._create_tool_context(tool_call, tool_index, assistant_msg_id, parsing_details)
97
+ tool_context.result = tool_result
577
98
 
578
- # Or execute now if not streamed
579
- elif final_tool_calls_to_process and not config.execute_on_stream:
580
- logging.info(
581
- f"Executing {len(final_tool_calls_to_process)} tools ({config.tool_execution_strategy}) after stream")
582
- self.root_span.event(name="executing_tools_after_stream", level="DEFAULT", status_message=(
583
- f"Executing {len(final_tool_calls_to_process)} tools ({config.tool_execution_strategy}) after stream"))
584
- results_list = await self._execute_tools(final_tool_calls_to_process,
585
- config.tool_execution_strategy)
586
- current_tool_idx = 0
587
- for tc, res in results_list:
588
- # Map back using all_tool_data_map which has correct indices
589
- if current_tool_idx in all_tool_data_map:
590
- tool_data = all_tool_data_map[current_tool_idx]
591
- context = self._create_tool_context(
592
- tc, current_tool_idx,
593
- last_assistant_message_object['message_id'] if last_assistant_message_object else None,
594
- tool_data.get('parsing_details')
595
- )
596
- context.result = res
597
- tool_results_map[current_tool_idx] = (tc, res, context)
598
- else:
599
- logging.warning(f"Could not map result for tool index {current_tool_idx}")
600
- self.root_span.event(name="could_not_map_result_for_tool_index", level="WARNING",
601
- status_message=(f"Could not map result for tool index {current_tool_idx}"))
602
- current_tool_idx += 1
99
+ tool_start_msg = self._add_tool_start_message(tool_context)
100
+ yield format_for_yield(tool_start_msg)
603
101
 
604
- # Save and Yield each result message
605
- if tool_results_map:
606
- logging.info(f"Saving and yielding {len(tool_results_map)} final tool result messages")
607
- self.root_span.event(name="saving_and_yielding_final_tool_result_messages", level="DEFAULT",
608
- status_message=(
609
- f"Saving and yielding {len(tool_results_map)} final tool result messages"))
610
- for tool_idx in sorted(tool_results_map.keys()):
611
- tool_call, result, context = tool_results_map[tool_idx]
612
- context.result = result
613
- if not context.assistant_message_id and last_assistant_message_object:
614
- context.assistant_message_id = last_assistant_message_object['message_id']
102
+ tool_message = self._add_tool_messsage(tool_call, tool_result, self.xml_adding_strategy, assistant_msg_id, parsing_details)
615
103
 
616
- # Yield start status ONLY IF executing non-streamed (already yielded if streamed)
617
- if not config.execute_on_stream and tool_idx not in yielded_tool_indices:
618
- started_msg_obj = await self._add_tool_start_message(context)
619
- if started_msg_obj: yield format_for_yield(started_msg_obj)
620
- yielded_tool_indices.add(tool_idx) # Mark status yielded
104
+ tool_completed_msg = self._add_tool_completed_message(tool_context, tool_message['message_id'])
105
+ yield format_for_yield(tool_completed_msg)
621
106
 
622
- # Save the tool result message to DB
623
- saved_tool_result_object = await self._add_tool_messsage(tool_call, result, config.xml_adding_strategy,
624
- context.assistant_message_id, context.parsing_details
625
- )
107
+ yield format_for_yield(tool_message)
626
108
 
627
- # Yield completed/failed status (linked to saved result ID if available)
628
- completed_msg_obj = await self._add_tool_completed_message(
629
- context,
630
- saved_tool_result_object['message_id'] if saved_tool_result_object else None
631
- )
632
- if completed_msg_obj: yield format_for_yield(completed_msg_obj)
633
- # Don't add to yielded_tool_indices here, completion status is separate yield
109
+ if tool_completed_msg["metadata"].get("agent_should_terminate") == "true":
110
+ finish_reason = "completed"
111
+ break
634
112
 
635
- # Yield the saved tool result object
636
- if saved_tool_result_object:
637
- tool_result_message_objects[tool_idx] = saved_tool_result_object
638
- yield format_for_yield(saved_tool_result_object)
639
- else:
640
- logging.error(
641
- f"Failed to save tool result for index {tool_idx}, not yielding result message.")
642
- self.root_span.event(name="failed_to_save_tool_result_for_index", level="ERROR",
643
- status_message=(
644
- f"Failed to save tool result for index {tool_idx}, not yielding result message."))
645
- # Optionally yield error status for saving failure?
113
+ tool_index += 1
114
+ else:
115
+ finish_reason = "non_tool_call"
116
+ logging.warning(f"StreamResp: tool_calls is empty, No Tool need to call !")
646
117
 
647
- # --- Final Finish Status ---
648
- if finish_reason and finish_reason != "xml_tool_limit_reached":
118
+ if finish_reason:
649
119
  finish_content = {"status_type": "finish", "finish_reason": finish_reason}
650
- finish_msg_obj = await self.add_response_message(
651
- type="status", content=finish_content,
652
- is_llm_message=False, metadata={"thread_run_id": thread_run_id}
653
- )
654
- if finish_msg_obj: yield format_for_yield(finish_msg_obj)
655
-
656
- # Check if agent should terminate after processing pending tools
657
- if agent_should_terminate:
658
- logging.info(
659
- "Agent termination requested after executing ask/complete tool. Stopping further processing.")
660
- self.root_span.event(name="agent_termination_requested", level="DEFAULT",
661
- status_message="Agent termination requested after executing ask/complete tool. Stopping further processing.")
662
-
663
- # Set finish reason to indicate termination
664
- finish_reason = "agent_terminated"
665
-
666
- # Save and yield termination status
667
- finish_content = {"status_type": "finish", "finish_reason": "agent_terminated"}
668
- finish_msg_obj = await self.add_response_message(
669
- type="status", content=finish_content,
670
- is_llm_message=False, metadata={"thread_run_id": thread_run_id}
671
- )
672
- if finish_msg_obj: yield format_for_yield(finish_msg_obj)
673
-
674
- # Save assistant_response_end BEFORE terminating
675
- if last_assistant_message_object:
676
- try:
677
- # Calculate response time if we have timing data
678
- if streaming_metadata["first_chunk_time"] and streaming_metadata["last_chunk_time"]:
679
- streaming_metadata["response_ms"] = (streaming_metadata["last_chunk_time"] -
680
- streaming_metadata["first_chunk_time"]) * 1000
681
-
682
- # Create a LiteLLM-like response object for streaming (before termination)
683
- # Check if we have any actual usage data
684
- has_usage_data = (
685
- streaming_metadata["usage"]["prompt_tokens"] > 0 or
686
- streaming_metadata["usage"]["completion_tokens"] > 0 or
687
- streaming_metadata["usage"]["total_tokens"] > 0
688
- )
689
-
690
- assistant_end_content = {
691
- "choices": [
692
- {
693
- "finish_reason": finish_reason or "stop",
694
- "index": 0,
695
- "message": {
696
- "role": "assistant",
697
- "content": accumulated_content,
698
- "tool_calls": complete_native_tool_calls or None
699
- }
700
- }
701
- ],
702
- "created": streaming_metadata.get("created"),
703
- "model": streaming_metadata.get("model", llm_model),
704
- "usage": streaming_metadata["usage"], # Always include usage like LiteLLM does
705
- "streaming": True, # Add flag to indicate this was reconstructed from streaming
706
- }
707
-
708
- # Only include response_ms if we have timing data
709
- if streaming_metadata.get("response_ms"):
710
- assistant_end_content["response_ms"] = streaming_metadata["response_ms"]
711
-
712
- await self.add_response_message(
713
- type="assistant_response_end",
714
- content=assistant_end_content,
715
- is_llm_message=False,
716
- metadata={"thread_run_id": thread_run_id}
717
- )
718
- logging.info("Assistant response end saved for stream (before termination)")
719
- except Exception as e:
720
- logging.error(f"Error saving assistant response end for stream (before termination): {str(e)}")
721
- self.root_span.event(name="error_saving_assistant_response_end_for_stream_before_termination",
722
- level="ERROR", status_message=(
723
- f"Error saving assistant response end for stream (before termination): {str(e)}"))
724
-
725
- # Skip all remaining processing and go to finally block
726
- return
727
-
728
- # --- Save and Yield assistant_response_end ---
729
- # Only save assistant_response_end if not auto-continuing (response is actually complete)
730
- if not should_auto_continue:
731
- if last_assistant_message_object: # Only save if assistant message was saved
732
- try:
733
- # Calculate response time if we have timing data
734
- if streaming_metadata["first_chunk_time"] and streaming_metadata["last_chunk_time"]:
735
- streaming_metadata["response_ms"] = (streaming_metadata["last_chunk_time"] -
736
- streaming_metadata["first_chunk_time"]) * 1000
737
-
738
- # Create a LiteLLM-like response object for streaming
739
- # Check if we have any actual usage data
740
- has_usage_data = (
741
- streaming_metadata["usage"]["prompt_tokens"] > 0 or
742
- streaming_metadata["usage"]["completion_tokens"] > 0 or
743
- streaming_metadata["usage"]["total_tokens"] > 0
744
- )
745
-
746
- assistant_end_content = {
747
- "choices": [
748
- {
749
- "finish_reason": finish_reason or "stop",
750
- "index": 0,
751
- "message": {
752
- "role": "assistant",
753
- "content": accumulated_content,
754
- "tool_calls": complete_native_tool_calls or None
755
- }
756
- }
757
- ],
758
- "created": streaming_metadata.get("created"),
759
- "model": streaming_metadata.get("model", llm_model),
760
- "usage": streaming_metadata["usage"], # Always include usage like LiteLLM does
761
- "streaming": True, # Add flag to indicate this was reconstructed from streaming
762
- }
763
-
764
- # Only include response_ms if we have timing data
765
- if streaming_metadata.get("response_ms"):
766
- assistant_end_content["response_ms"] = streaming_metadata["response_ms"]
767
-
768
- await self.add_response_message(
769
- type="assistant_response_end",
770
- content=assistant_end_content,
771
- is_llm_message=False,
772
- metadata={"thread_run_id": thread_run_id}
773
- )
774
- logging.info("Assistant response end saved for stream")
775
- except Exception as e:
776
- logging.error(f"Error saving assistant response end for stream: {str(e)}")
777
- self.root_span.event(name="error_saving_assistant_response_end_for_stream", level="ERROR",
778
- status_message=(f"Error saving assistant response end for stream: {str(e)}"))
779
-
120
+ finish_msg = self.add_response_message(type="status", content=finish_content, is_llm_message=False)
121
+ yield format_for_yield(finish_msg)
780
122
  except Exception as e:
781
- logging.error(f"Error processing stream: {str(e)}", exc_info=True)
782
- self.root_span.event(name="error_processing_stream", level="ERROR",
783
- status_message=(f"Error processing stream: {str(e)}"))
784
- # Save and yield error status message
123
+ logging.error(f"StreamResp: Process response llm_content: {accumulated_content}")
124
+ handle_error(e)
125
+ self.root_span.event(name="stream_response_process_error", level="ERROR",
126
+ status_message=f"Process streaming response error: {e}",
127
+ metadata={"content": accumulated_content})
785
128
 
786
- err_content = {"role": "system", "status_type": "error", "message": str(e)}
787
- if (not "AnthropicException - Overloaded" in str(e)):
788
- err_msg_obj = await self.add_response_message(
789
- type="status", content=err_content,
790
- is_llm_message=False,
791
- metadata={"thread_run_id": thread_run_id if 'thread_run_id' in locals() else None}
792
- )
793
- if err_msg_obj: yield format_for_yield(err_msg_obj) # Yield the saved error message
794
- # Re-raise the same exception (not a new one) to ensure proper error propagation
795
- logging.critical(f"Re-raising error to stop further processing: {str(e)}")
796
- self.root_span.event(name="re_raising_error_to_stop_further_processing", level="ERROR",
797
- status_message=(f"Re-raising error to stop further processing: {str(e)}"))
798
- else:
799
- logging.error(f"AnthropicException - Overloaded detected - Falling back to OpenRouter: {str(e)}",
800
- exc_info=True)
801
- self.root_span.event(name="anthropic_exception_overloaded_detected", level="ERROR", status_message=(
802
- f"AnthropicException - Overloaded detected - Falling back to OpenRouter: {str(e)}"))
803
- raise # Use bare 'raise' to preserve the original exception with its traceback
129
+ content = {"role": "system", "status_type": "error", "message": f"Process streaming response error: {e}"}
130
+ error_msg = self.add_response_message(type="status", content=content, is_llm_message=False)
131
+ yield format_for_yield(error_msg)
804
132
 
133
+ raise # Use bare 'raise' to preserve the original exception with its traceback
805
134
  finally:
806
- # Update continuous state for potential auto-continue
807
135
  if should_auto_continue:
808
136
  continuous_state['accumulated_content'] = accumulated_content
809
- continuous_state['sequence'] = __sequence
810
-
811
- logging.info(f"Updated continuous state for auto-continue with {len(accumulated_content)} chars")
812
- else:
813
- # Save and Yield the final thread_run_end status (only if not auto-continuing and finish_reason is not 'length')
814
- try:
815
- end_content = {"status_type": "thread_run_end"}
816
- end_msg_obj = await self.add_response_message(
817
- type="status", content=end_content,
818
- is_llm_message=False,
819
- metadata={"thread_run_id": thread_run_id if 'thread_run_id' in locals() else None}
820
- )
821
- if end_msg_obj: yield format_for_yield(end_msg_obj)
822
- except Exception as final_e:
823
- logging.error(f"Error in finally block: {str(final_e)}", exc_info=True)
824
- self.root_span.event(name="error_in_finally_block", level="ERROR",
825
- status_message=(f"Error in finally block: {str(final_e)}"))
137
+ continuous_state['assistant_msg_sequence'] = sequence
138
+ logging.warning(f"StreamResp: Updated continuous state for auto-continue with {len(accumulated_content)} chars")