unique_orchestrator 1.7.3__tar.gz → 1.7.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unique_orchestrator might be problematic. Click here for more details.

@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.7.5] - 2025-11-05
9
+ - Adding functionality to include user metadata into user/system prompts of the orchestrator
10
+
11
+ ## [1.7.4] - 2025-11-04
12
+ - Update and adapt to toolkit 1.23.0 (refactor sub agents implementation)
13
+
8
14
  ## [1.7.3] - 2025-11-03
9
15
  - Fixed an issue where new assistant messages were not properly generated during streaming outputs with tool calls; the orchestrator now correctly creates messages via `_create_new_assistant_message_if_loop_response_contains_content` when loop_response includes text and tool invocations.
10
16
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_orchestrator
3
- Version: 1.7.3
3
+ Version: 1.7.5
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Andreas Hauri
@@ -20,7 +20,7 @@ Requires-Dist: unique-internal-search (>=1.0.1,<2.0.0)
20
20
  Requires-Dist: unique-sdk (>=0.10.34,<0.11.0)
21
21
  Requires-Dist: unique-stock-ticker (>=1.0.2,<2.0.0)
22
22
  Requires-Dist: unique-swot (>=0.1.0,<0.2.0)
23
- Requires-Dist: unique-toolkit (>=1.22.2,<2.0.0)
23
+ Requires-Dist: unique-toolkit (>=1.23.0,<2.0.0)
24
24
  Requires-Dist: unique-web-search (>=1.3.1,<2.0.0)
25
25
  Description-Content-Type: text/markdown
26
26
 
@@ -34,6 +34,12 @@ All notable changes to this project will be documented in this file.
34
34
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
35
35
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
36
36
 
37
+ ## [1.7.5] - 2025-11-05
38
+ - Adding functionality to include user metadata into user/system prompts of the orchestrator
39
+
40
+ ## [1.7.4] - 2025-11-04
41
+ - Update and adapt to toolkit 1.23.0 (refactor sub agents implementation)
42
+
37
43
  ## [1.7.3] - 2025-11-03
38
44
  - Fixed an issue where new assistant messages were not properly generated during streaming outputs with tool calls; the orchestrator now correctly creates messages via `_create_new_assistant_message_if_loop_response_contains_content` when loop_response includes text and tool invocations.
39
45
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "unique_orchestrator"
3
- version = "1.7.3"
3
+ version = "1.7.5"
4
4
  description = ""
5
5
  authors = ["Andreas Hauri <andreas.hauri@unique.ai>"]
6
6
  readme = ["README.md", "CHANGELOG.md"]
@@ -15,7 +15,7 @@ python-dotenv = "^1.0.1"
15
15
  pytest = "^8.4.1"
16
16
  unique-sdk = "^0.10.34"
17
17
 
18
- unique-toolkit = "^1.22.2"
18
+ unique-toolkit = "^1.23.0"
19
19
  unique-stock-ticker = "^1.0.2"
20
20
  unique-follow-up-questions = "^1.1.2"
21
21
  unique-internal-search = "^1.0.1"
@@ -163,6 +163,12 @@ class UniqueAIPromptConfig(BaseModel):
163
163
  description="The user message prompt template as a Jinja2 template string.",
164
164
  )
165
165
 
166
+ user_metadata: list[str] = Field(
167
+ default=[],
168
+ title="User Metadata",
169
+ description="User metadata fields to be ingested in the system prompt.",
170
+ )
171
+
166
172
 
167
173
  class UniqueAIServices(BaseModel):
168
174
  """Determine the services the agent is using
@@ -0,0 +1,257 @@
1
+ from unittest.mock import MagicMock
2
+
3
+ import pytest
4
+
5
+
6
+ class TestGetFilteredUserMetadata:
7
+ """Test suite for UniqueAI._get_filtered_user_metadata method"""
8
+
9
+ @pytest.fixture
10
+ def mock_unique_ai(self):
11
+ """Create a minimal UniqueAI instance with mocked dependencies"""
12
+ # Lazy import to avoid heavy dependencies at module import time
13
+ from unique_orchestrator.unique_ai import UniqueAI
14
+
15
+ mock_logger = MagicMock()
16
+
17
+ # Create minimal event structure
18
+ dummy_event = MagicMock()
19
+ dummy_event.payload.assistant_message.id = "assist_1"
20
+ dummy_event.payload.user_message.text = "query"
21
+
22
+ # Create minimal config structure
23
+ mock_config = MagicMock()
24
+ mock_config.agent.prompt_config.user_metadata = []
25
+
26
+ # Create minimal required dependencies
27
+ mock_chat_service = MagicMock()
28
+ mock_content_service = MagicMock()
29
+ mock_debug_info_manager = MagicMock()
30
+ mock_reference_manager = MagicMock()
31
+ mock_thinking_manager = MagicMock()
32
+ mock_tool_manager = MagicMock()
33
+ mock_history_manager = MagicMock()
34
+ mock_evaluation_manager = MagicMock()
35
+ mock_postprocessor_manager = MagicMock()
36
+ mock_streaming_handler = MagicMock()
37
+
38
+ # Instantiate UniqueAI
39
+ ua = UniqueAI(
40
+ logger=mock_logger,
41
+ event=dummy_event,
42
+ config=mock_config,
43
+ chat_service=mock_chat_service,
44
+ content_service=mock_content_service,
45
+ debug_info_manager=mock_debug_info_manager,
46
+ streaming_handler=mock_streaming_handler,
47
+ reference_manager=mock_reference_manager,
48
+ thinking_manager=mock_thinking_manager,
49
+ tool_manager=mock_tool_manager,
50
+ history_manager=mock_history_manager,
51
+ evaluation_manager=mock_evaluation_manager,
52
+ postprocessor_manager=mock_postprocessor_manager,
53
+ mcp_servers=[],
54
+ )
55
+
56
+ return ua
57
+
58
+ def test_returns_empty_dict_when_config_is_empty_list(self, mock_unique_ai):
59
+ """Test that empty dict is returned when config.user_metadata is an empty list"""
60
+ mock_unique_ai._config.agent.prompt_config.user_metadata = []
61
+ mock_unique_ai._event.payload.user_metadata = {
62
+ "department": "Engineering",
63
+ "role": "Developer",
64
+ }
65
+
66
+ result = mock_unique_ai._get_filtered_user_metadata()
67
+
68
+ assert result == {}
69
+ assert isinstance(result, dict)
70
+
71
+ def test_returns_empty_dict_when_user_metadata_is_none(self, mock_unique_ai):
72
+ """Test that empty dict is returned when user_metadata is None"""
73
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
74
+ "department",
75
+ "role",
76
+ ]
77
+ mock_unique_ai._event.payload.user_metadata = None
78
+
79
+ result = mock_unique_ai._get_filtered_user_metadata()
80
+
81
+ assert result == {}
82
+ assert isinstance(result, dict)
83
+
84
+ def test_returns_empty_dict_when_both_config_and_metadata_are_empty(
85
+ self, mock_unique_ai
86
+ ):
87
+ """Test that empty dict is returned when both config and user_metadata are empty/None"""
88
+ mock_unique_ai._config.agent.prompt_config.user_metadata = []
89
+ mock_unique_ai._event.payload.user_metadata = None
90
+
91
+ result = mock_unique_ai._get_filtered_user_metadata()
92
+
93
+ assert result == {}
94
+ assert isinstance(result, dict)
95
+
96
+ def test_filters_metadata_to_include_only_configured_keys(self, mock_unique_ai):
97
+ """Test that only keys specified in config are included in the result"""
98
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
99
+ "department",
100
+ "role",
101
+ ]
102
+ mock_unique_ai._event.payload.user_metadata = {
103
+ "department": "Engineering",
104
+ "role": "Developer",
105
+ "location": "San Francisco",
106
+ "salary": "100000",
107
+ }
108
+
109
+ result = mock_unique_ai._get_filtered_user_metadata()
110
+
111
+ assert result == {"department": "Engineering", "role": "Developer"}
112
+ assert "location" not in result
113
+ assert "salary" not in result
114
+ # Verify all values are strings
115
+ assert all(isinstance(v, str) for v in result.values())
116
+
117
+ def test_returns_only_existing_keys_from_user_metadata(self, mock_unique_ai):
118
+ """Test that keys in config but not in user_metadata are not included"""
119
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
120
+ "department",
121
+ "role",
122
+ "team",
123
+ "manager",
124
+ ]
125
+ mock_unique_ai._event.payload.user_metadata = {
126
+ "department": "Engineering",
127
+ "role": "Developer",
128
+ }
129
+
130
+ result = mock_unique_ai._get_filtered_user_metadata()
131
+
132
+ assert result == {"department": "Engineering", "role": "Developer"}
133
+ assert "team" not in result
134
+ assert "manager" not in result
135
+
136
+ def test_handles_single_key_in_config(self, mock_unique_ai):
137
+ """Test filtering with a single key in config"""
138
+ mock_unique_ai._config.agent.prompt_config.user_metadata = ["department"]
139
+ mock_unique_ai._event.payload.user_metadata = {
140
+ "department": "Engineering",
141
+ "role": "Developer",
142
+ }
143
+
144
+ result = mock_unique_ai._get_filtered_user_metadata()
145
+
146
+ assert result == {"department": "Engineering"}
147
+ assert isinstance(result["department"], str)
148
+
149
+ def test_handles_string_values(self, mock_unique_ai):
150
+ """Test that string values in user_metadata are preserved"""
151
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
152
+ "name",
153
+ "email",
154
+ "department",
155
+ "title",
156
+ ]
157
+ mock_unique_ai._event.payload.user_metadata = {
158
+ "name": "John Doe",
159
+ "email": "john.doe@example.com",
160
+ "department": "Engineering",
161
+ "title": "Senior Developer",
162
+ "ignored": "This should not appear",
163
+ }
164
+
165
+ result = mock_unique_ai._get_filtered_user_metadata()
166
+
167
+ assert result == {
168
+ "name": "John Doe",
169
+ "email": "john.doe@example.com",
170
+ "department": "Engineering",
171
+ "title": "Senior Developer",
172
+ }
173
+ assert "ignored" not in result
174
+ # Verify all values are strings
175
+ assert all(isinstance(v, str) for v in result.values())
176
+
177
+ def test_handles_empty_dict_user_metadata(self, mock_unique_ai):
178
+ """Test behavior when user_metadata is an empty dict"""
179
+ mock_unique_ai._config.agent.prompt_config.user_metadata = ["department"]
180
+ mock_unique_ai._event.payload.user_metadata = {}
181
+
182
+ result = mock_unique_ai._get_filtered_user_metadata()
183
+
184
+ assert result == {}
185
+
186
+ def test_handles_empty_string_values(self, mock_unique_ai):
187
+ """Test that empty string values in user_metadata are preserved if key is in config"""
188
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
189
+ "department",
190
+ "role",
191
+ ]
192
+ mock_unique_ai._event.payload.user_metadata = {
193
+ "department": "Engineering",
194
+ "role": "",
195
+ }
196
+
197
+ result = mock_unique_ai._get_filtered_user_metadata()
198
+
199
+ assert result == {"department": "Engineering", "role": ""}
200
+ assert isinstance(result["role"], str)
201
+
202
+ def test_preserves_original_metadata_unchanged(self, mock_unique_ai):
203
+ """Test that the original user_metadata dict is not modified"""
204
+ original_metadata = {
205
+ "department": "Engineering",
206
+ "role": "Developer",
207
+ "location": "San Francisco",
208
+ }
209
+ mock_unique_ai._config.agent.prompt_config.user_metadata = ["department"]
210
+ mock_unique_ai._event.payload.user_metadata = original_metadata.copy()
211
+
212
+ result = mock_unique_ai._get_filtered_user_metadata()
213
+
214
+ # Original should still have all keys
215
+ assert mock_unique_ai._event.payload.user_metadata == original_metadata
216
+ # Result should only have filtered key
217
+ assert result == {"department": "Engineering"}
218
+
219
+ def test_handles_special_characters_in_values(self, mock_unique_ai):
220
+ """Test that special characters in string values are preserved"""
221
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
222
+ "description",
223
+ "notes",
224
+ ]
225
+ mock_unique_ai._event.payload.user_metadata = {
226
+ "description": "User with special chars: @#$%^&*()",
227
+ "notes": "Multi-line\ntext\twith\ttabs",
228
+ "other": "excluded",
229
+ }
230
+
231
+ result = mock_unique_ai._get_filtered_user_metadata()
232
+
233
+ assert result == {
234
+ "description": "User with special chars: @#$%^&*()",
235
+ "notes": "Multi-line\ntext\twith\ttabs",
236
+ }
237
+ assert all(isinstance(v, str) for v in result.values())
238
+
239
+ def test_return_type_is_dict_str_str(self, mock_unique_ai):
240
+ """Test that return type is dict[str, str]"""
241
+ mock_unique_ai._config.agent.prompt_config.user_metadata = [
242
+ "department",
243
+ "role",
244
+ ]
245
+ mock_unique_ai._event.payload.user_metadata = {
246
+ "department": "Engineering",
247
+ "role": "Developer",
248
+ }
249
+
250
+ result = mock_unique_ai._get_filtered_user_metadata()
251
+
252
+ # Check it's a dict
253
+ assert isinstance(result, dict)
254
+ # Check all keys are strings
255
+ assert all(isinstance(k, str) for k in result.keys())
256
+ # Check all values are strings
257
+ assert all(isinstance(v, str) for v in result.values())
@@ -228,7 +228,9 @@ class UniqueAI:
228
228
  self._logger.debug(
229
229
  "Tools were called we process them and do not exit the loop"
230
230
  )
231
- await self._create_new_assistant_message_if_loop_response_contains_content(loop_response)
231
+ await self._create_new_assistant_message_if_loop_response_contains_content(
232
+ loop_response
233
+ )
232
234
 
233
235
  return await self._handle_tool_calls(loop_response)
234
236
 
@@ -268,6 +270,8 @@ class UniqueAI:
268
270
  mcp_server.user_prompt for mcp_server in self._mcp_servers
269
271
  ]
270
272
 
273
+ user_metadata = self._get_filtered_user_metadata()
274
+
271
275
  tool_descriptions = self._tool_manager.get_tool_prompts()
272
276
 
273
277
  query = self._event.payload.user_message.text
@@ -291,12 +295,11 @@ class UniqueAI:
291
295
  tool_descriptions_with_user_prompts=tool_descriptions_with_user_prompts,
292
296
  use_sub_agent_references=use_sub_agent_references,
293
297
  sub_agent_referencing_instructions=sub_agent_referencing_instructions,
298
+ user_metadata=user_metadata,
294
299
  )
295
300
  return user_msg
296
301
 
297
- async def _render_system_prompt(
298
- self,
299
- ) -> str:
302
+ async def _render_system_prompt(self) -> str:
300
303
  # TODO: Collect tool information here and adapt to system prompt
301
304
  tool_descriptions = self._tool_manager.get_tool_prompts()
302
305
 
@@ -311,6 +314,8 @@ class UniqueAI:
311
314
 
312
315
  date_string = datetime.now().strftime("%A %B %d, %Y")
313
316
 
317
+ user_metadata = self._get_filtered_user_metadata()
318
+
314
319
  mcp_server_system_prompts = [
315
320
  mcp_server.system_prompt for mcp_server in self._mcp_servers
316
321
  ]
@@ -339,6 +344,7 @@ class UniqueAI:
339
344
  mcp_server_system_prompts=mcp_server_system_prompts,
340
345
  use_sub_agent_references=use_sub_agent_references,
341
346
  sub_agent_referencing_instructions=sub_agent_referencing_instructions,
347
+ user_metadata=user_metadata,
342
348
  )
343
349
  return system_message
344
350
 
@@ -444,6 +450,26 @@ class UniqueAI:
444
450
  )
445
451
  )
446
452
 
453
+ def _get_filtered_user_metadata(self) -> dict[str, str]:
454
+ """
455
+ Filter user metadata to only include keys specified in the agent's prompt config.
456
+
457
+ Returns:
458
+ Dictionary containing only the metadata keys that are configured to be included.
459
+ """
460
+ user_metadata = {}
461
+ if (
462
+ self._config.agent.prompt_config.user_metadata
463
+ and self._event.payload.user_metadata is not None
464
+ ):
465
+ # Filter metadata to only include selected keys
466
+ user_metadata = {
467
+ k: str(v)
468
+ for k, v in self._event.payload.user_metadata.items()
469
+ if k in self._config.agent.prompt_config.user_metadata
470
+ }
471
+ return user_metadata
472
+
447
473
 
448
474
  class UniqueAIResponsesApi(UniqueAI):
449
475
  def __init__(
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  from logging import Logger
3
- from typing import NamedTuple
3
+ from typing import NamedTuple, cast
4
4
 
5
5
  from openai import AsyncOpenAI
6
6
  from unique_follow_up_questions.follow_up_postprocessor import (
@@ -46,11 +46,13 @@ from unique_toolkit.agentic.thinking_manager.thinking_manager import (
46
46
  from unique_toolkit.agentic.tools.a2a import (
47
47
  A2AManager,
48
48
  ExtendedSubAgentToolConfig,
49
+ SubAgentDisplaySpec,
49
50
  SubAgentEvaluationService,
50
- SubAgentResponsesPostprocessor,
51
- )
52
- from unique_toolkit.agentic.tools.a2a.postprocessing.postprocessor import (
51
+ SubAgentEvaluationSpec,
52
+ SubAgentReferencesPostprocessor,
53
+ SubAgentResponsesDisplayPostprocessor,
53
54
  SubAgentResponsesPostprocessorConfig,
55
+ SubAgentResponseWatcher,
54
56
  )
55
57
  from unique_toolkit.agentic.tools.config import ToolBuildConfig
56
58
  from unique_toolkit.agentic.tools.mcp.manager import MCPManager
@@ -107,6 +109,7 @@ class _CommonComponents(NamedTuple):
107
109
  history_manager: HistoryManager
108
110
  evaluation_manager: EvaluationManager
109
111
  postprocessor_manager: PostprocessorManager
112
+ response_watcher: SubAgentResponseWatcher
110
113
  # Tool Manager Components
111
114
  tool_progress_reporter: ToolProgressReporter
112
115
  tool_manager_config: ToolManagerConfig
@@ -126,6 +129,8 @@ def _build_common(
126
129
 
127
130
  uploaded_documents = content_service.get_documents_uploaded_to_chat()
128
131
 
132
+ response_watcher = SubAgentResponseWatcher()
133
+
129
134
  tool_progress_reporter = ToolProgressReporter(
130
135
  chat_service=chat_service,
131
136
  config=config.agent.services.tool_progress_reporter_config,
@@ -174,7 +179,9 @@ def _build_common(
174
179
  a2a_manager = A2AManager(
175
180
  logger=logger,
176
181
  tool_progress_reporter=tool_progress_reporter,
182
+ response_watcher=response_watcher,
177
183
  )
184
+
178
185
  tool_manager_config = ToolManagerConfig(
179
186
  tools=config.space.tools,
180
187
  max_tool_calls=config.agent.experimental.loop_configuration.max_tool_calls_per_iteration,
@@ -222,6 +229,7 @@ def _build_common(
222
229
  tool_manager_config=tool_manager_config,
223
230
  mcp_servers=event.payload.mcp_servers,
224
231
  postprocessor_manager=postprocessor_manager,
232
+ response_watcher=response_watcher,
225
233
  )
226
234
 
227
235
 
@@ -341,16 +349,15 @@ async def _build_responses(
341
349
  _add_sub_agents_postprocessor(
342
350
  postprocessor_manager=postprocessor_manager,
343
351
  tool_manager=tool_manager,
344
- user_id=event.user_id,
345
- company_id=event.company_id,
346
- chat_id=event.payload.chat_id,
347
- sleep_time_before_update=config.agent.experimental.sub_agents_config.sleep_time_before_update,
352
+ config=config,
353
+ response_watcher=common_components.response_watcher,
348
354
  )
349
355
  _add_sub_agents_evaluation(
350
356
  evaluation_manager=common_components.evaluation_manager,
351
357
  tool_manager=tool_manager,
352
358
  config=config,
353
359
  event=event,
360
+ response_watcher=common_components.response_watcher,
354
361
  )
355
362
 
356
363
  return UniqueAIResponsesApi(
@@ -414,16 +421,15 @@ def _build_completions(
414
421
  _add_sub_agents_postprocessor(
415
422
  postprocessor_manager=postprocessor_manager,
416
423
  tool_manager=tool_manager,
417
- user_id=event.user_id,
418
- company_id=event.company_id,
419
- chat_id=event.payload.chat_id,
420
- sleep_time_before_update=config.agent.experimental.sub_agents_config.sleep_time_before_update,
424
+ config=config,
425
+ response_watcher=common_components.response_watcher,
421
426
  )
422
427
  _add_sub_agents_evaluation(
423
428
  evaluation_manager=common_components.evaluation_manager,
424
429
  tool_manager=tool_manager,
425
430
  config=config,
426
431
  event=event,
432
+ response_watcher=common_components.response_watcher,
427
433
  )
428
434
 
429
435
  return UniqueAI(
@@ -447,28 +453,37 @@ def _build_completions(
447
453
  def _add_sub_agents_postprocessor(
448
454
  postprocessor_manager: PostprocessorManager,
449
455
  tool_manager: ToolManager | ResponsesApiToolManager,
450
- user_id: str,
451
- company_id: str,
452
- chat_id: str,
453
- sleep_time_before_update: float,
456
+ config: UniqueAIConfig,
457
+ response_watcher: SubAgentResponseWatcher,
454
458
  ) -> None:
455
459
  sub_agents = tool_manager.sub_agents
456
460
  if len(sub_agents) > 0:
457
- sub_agent_responses_postprocessor = SubAgentResponsesPostprocessor(
458
- user_id=user_id,
459
- main_agent_chat_id=chat_id,
460
- company_id=company_id,
461
- config=SubAgentResponsesPostprocessorConfig(
462
- sleep_time_before_update=sleep_time_before_update,
463
- ),
461
+ display_config = SubAgentResponsesPostprocessorConfig(
462
+ sleep_time_before_update=config.agent.experimental.sub_agents_config.sleep_time_before_update,
464
463
  )
465
- postprocessor_manager.add_postprocessor(sub_agent_responses_postprocessor)
466
-
467
- for tool in tool_manager.sub_agents:
468
- assert isinstance(tool.config, ExtendedSubAgentToolConfig)
469
- sub_agent_responses_postprocessor.register_sub_agent_tool(
470
- tool, tool.config.response_display_config
464
+ display_specs = []
465
+ for tool in sub_agents:
466
+ tool_config = cast(
467
+ ExtendedSubAgentToolConfig, tool.settings.configuration
468
+ ) # (BeforeValidator of ToolBuildConfig)
469
+
470
+ display_specs.append(
471
+ SubAgentDisplaySpec(
472
+ assistant_id=tool_config.assistant_id,
473
+ display_name=tool.display_name(),
474
+ display_config=tool_config.response_display_config,
475
+ )
471
476
  )
477
+ reference_postprocessor = SubAgentReferencesPostprocessor(
478
+ response_watcher=response_watcher,
479
+ )
480
+ sub_agent_responses_postprocessor = SubAgentResponsesDisplayPostprocessor(
481
+ config=display_config,
482
+ response_watcher=response_watcher,
483
+ display_specs=display_specs,
484
+ )
485
+ postprocessor_manager.add_postprocessor(reference_postprocessor)
486
+ postprocessor_manager.add_postprocessor(sub_agent_responses_postprocessor)
472
487
 
473
488
 
474
489
  def _add_sub_agents_evaluation(
@@ -476,18 +491,31 @@ def _add_sub_agents_evaluation(
476
491
  tool_manager: ToolManager | ResponsesApiToolManager,
477
492
  config: UniqueAIConfig,
478
493
  event: ChatEvent,
494
+ response_watcher: SubAgentResponseWatcher,
479
495
  ) -> None:
480
496
  sub_agents = tool_manager.sub_agents
481
- if len(sub_agents) > 0:
482
- sub_agent_evaluation = None
483
- if config.agent.experimental.sub_agents_config.evaluation_config is not None:
484
- sub_agent_evaluation = SubAgentEvaluationService(
485
- config=config.agent.experimental.sub_agents_config.evaluation_config,
486
- language_model_service=LanguageModelService.from_event(event),
487
- )
488
- evaluation_manager.add_evaluation(sub_agent_evaluation)
489
- for tool in tool_manager.sub_agents:
490
- assert isinstance(tool.config, ExtendedSubAgentToolConfig)
491
- sub_agent_evaluation.register_sub_agent_tool(
492
- tool, tool.config.evaluation_config
497
+ if (
498
+ len(sub_agents) > 0
499
+ and config.agent.experimental.sub_agents_config.evaluation_config is not None
500
+ ):
501
+ evaluation_specs = []
502
+ for tool in sub_agents:
503
+ tool_config = cast(
504
+ ExtendedSubAgentToolConfig, tool.settings.configuration
505
+ ) # (BeforeValidator of ToolBuildConfig)
506
+
507
+ evaluation_specs.append(
508
+ SubAgentEvaluationSpec(
509
+ assistant_id=tool_config.assistant_id,
510
+ display_name=tool.display_name(),
511
+ config=tool_config.evaluation_config,
493
512
  )
513
+ )
514
+
515
+ sub_agent_evaluation = SubAgentEvaluationService(
516
+ config=config.agent.experimental.sub_agents_config.evaluation_config,
517
+ language_model_service=LanguageModelService.from_event(event),
518
+ evaluation_specs=evaluation_specs,
519
+ response_watcher=response_watcher,
520
+ )
521
+ evaluation_manager.add_evaluation(sub_agent_evaluation)