PyPI - unique_toolkit - Versions diffs - 1.7.0__tar.gz → 1.8.1__tar.gz - Mend

unique_toolkit 1.7.0tar.gz → 1.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

{unique_toolkit-1.7.0 → unique_toolkit-1.8.1}/CHANGELOG.md RENAMED Viewed

@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.8.1] - 2026-10-03
+- Fix bug where sub agent evaluation config variable `include_evaluation` did not include aliases for previous names.
+## [1.8.0] - 2026-10-03
+- Sub Agents now block when executing the same sub-agent multiple times with `reuse_chat` set to `True`.
+- Sub Agents tool, evaluation and post-processing refactored and tests added.
 ## [1.7.0] - 2025-10-01
 - Add functionality to remove text in `get_user_visible_chat_history`

{unique_toolkit-1.7.0 → unique_toolkit-1.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unique_toolkit
-Version: 1.7.0
+Version: 1.8.1
 Summary:
 License: Proprietary
 Author: Cedric Klinkert
@@ -118,6 +118,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.8.1] - 2026-10-03
+- Fix bug where sub agent evaluation config variable `include_evaluation` did not include aliases for previous names.
+## [1.8.0] - 2026-10-03
+- Sub Agents now block when executing the same sub-agent multiple times with `reuse_chat` set to `True`.
+- Sub Agents tool, evaluation and post-processing refactored and tests added.
 ## [1.7.0] - 2025-10-01
 - Add functionality to remove text in `get_user_visible_chat_history`

{unique_toolkit-1.7.0 → unique_toolkit-1.8.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "unique_toolkit"
-version = "1.7.0"
+version = "1.8.1"
 description = ""
 authors = [
     "Cedric Klinkert <cedric.klinkert@unique.ch>",

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+from unique_toolkit.agentic.tools.a2a.config import ExtendedSubAgentToolConfig
+from unique_toolkit.agentic.tools.a2a.evaluation import (
+    SubAgentEvaluationService,
+    SubAgentEvaluationServiceConfig,
+)
+from unique_toolkit.agentic.tools.a2a.manager import A2AManager
+from unique_toolkit.agentic.tools.a2a.postprocessing import (
+    SubAgentResponsesPostprocessor,
+)
+from unique_toolkit.agentic.tools.a2a.tool import SubAgentTool, SubAgentToolConfig
+__all__ = [
+    "SubAgentToolConfig",
+    "SubAgentTool",
+    "SubAgentResponsesPostprocessor",
+    "A2AManager",
+    "ExtendedSubAgentToolConfig",
+    "SubAgentEvaluationServiceConfig",
+    "SubAgentEvaluationService",
+]

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/config.py ADDED Viewed

@@ -0,0 +1,17 @@
+from pydantic import Field
+from unique_toolkit.agentic.tools.a2a.evaluation import SubAgentEvaluationConfig
+from unique_toolkit.agentic.tools.a2a.postprocessing import SubAgentDisplayConfig
+from unique_toolkit.agentic.tools.a2a.tool import SubAgentToolConfig
+# SubAgentToolConfig with display and evaluation configs
+class ExtendedSubAgentToolConfig(SubAgentToolConfig):
+    response_display_config: SubAgentDisplayConfig = Field(
+        default_factory=SubAgentDisplayConfig,
+        description="Configuration for how to display the sub-agent response.",
+    )
+    evaluation_config: SubAgentEvaluationConfig = Field(
+        default_factory=SubAgentEvaluationConfig,
+        description="Configuration for handling assessments of the sub-agent response.",
+    )

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/evaluation/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from unique_toolkit.agentic.tools.a2a.evaluation.config import (
+    SubAgentEvaluationConfig,
+    SubAgentEvaluationServiceConfig,
+)
+from unique_toolkit.agentic.tools.a2a.evaluation.evaluator import (
+    SubAgentEvaluationService,
+)
+__all__ = [
+    "SubAgentEvaluationService",
+    "SubAgentEvaluationServiceConfig",
+    "SubAgentEvaluationConfig",
+]

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/evaluation/_utils.py ADDED Viewed

@@ -0,0 +1,66 @@
+import logging
+import unique_sdk
+from unique_toolkit.chat.schemas import (
+    ChatMessageAssessmentLabel,
+    ChatMessageAssessmentStatus,
+)
+logger = logging.getLogger(__name__)
+_ASSESSMENT_LABEL_COMPARISON_DICT: dict[str, int] = {
+    ChatMessageAssessmentLabel.RED: 0,
+    ChatMessageAssessmentLabel.YELLOW: 1,
+    ChatMessageAssessmentLabel.GREEN: 2,
+}
+def _sort_assessments(
+    assessments: list[unique_sdk.Space.Assessment],
+) -> list[unique_sdk.Space.Assessment]:
+    return sorted(
+        assessments,
+        key=lambda x: _ASSESSMENT_LABEL_COMPARISON_DICT[x["label"]],  # type: ignore (should be checked before sorting)
+    )
+def _worst_label(
+    *labels: str,
+) -> str:
+    return min(
+        labels,
+        key=lambda x: _ASSESSMENT_LABEL_COMPARISON_DICT[x],
+    )
+def _get_valid_assessments(
+    assessments: list[unique_sdk.Space.Assessment],
+    display_name: str,
+    sequence_number: int,
+) -> list[unique_sdk.Space.Assessment]:
+    valid_assessments = []
+    for assessment in assessments:
+        if (
+            assessment["label"] is None
+            or assessment["label"] not in ChatMessageAssessmentLabel
+        ):
+            logger.warning(
+                "Unkown assistant label %s for assistant %s (sequence number: %s) will be ignored",
+                assessment["label"],
+                display_name,
+                sequence_number,
+            )
+            continue
+        if assessment["status"] != ChatMessageAssessmentStatus.DONE:
+            logger.warning(
+                "Assessment %s for assistant %s (sequence number: %s) is not done (status: %s) will be ignored",
+                assessment["label"],
+                display_name,
+                sequence_number,
+                assessment["status"],
+            )
+            continue
+        valid_assessments.append(assessment)
+    return valid_assessments

{unique_toolkit-1.7.0 → unique_toolkit-1.8.1}/unique_toolkit/agentic/tools/a2a/evaluation/config.py RENAMED Viewed

@@ -1,8 +1,9 @@
 from pathlib import Path
-from pydantic import BaseModel, Field
+from pydantic import AliasChoices, BaseModel, Field
 from unique_toolkit._common.default_language_model import DEFAULT_GPT_4o
+from unique_toolkit._common.pydantic_helpers import get_configuration_dict
 from unique_toolkit._common.validators import LMI, get_LMI_default_field
 from unique_toolkit.chat.schemas import (
     ChatMessageAssessmentType,
@@ -22,12 +23,13 @@ with open(Path(__file__).parent / "summarization_user_message.j2", "r") as file:
     DEFAULT_SUMMARIZATION_USER_MESSAGE_TEMPLATE = file.read().strip()
-class SubAgentEvaluationConfig(BaseModel):
+class SubAgentEvaluationServiceConfig(BaseModel):
+    model_config = get_configuration_dict()
     assessment_type: ChatMessageAssessmentType = Field(
         default=ChatMessageAssessmentType.COMPLIANCE,
         description="The type of assessment to use in the display.",
     )
     summarization_model: LMI = get_LMI_default_field(DEFAULT_GPT_4o)
     summarization_system_message: str = Field(
         default=DEFAULT_EVALUATION_SYSTEM_MESSAGE_TEMPLATE,
@@ -37,3 +39,17 @@ class SubAgentEvaluationConfig(BaseModel):
         default=DEFAULT_SUMMARIZATION_USER_MESSAGE_TEMPLATE,
         description="The user message template for the summarization model.",
     )
+class SubAgentEvaluationConfig(BaseModel):
+    model_config = get_configuration_dict()
+    include_evaluation: bool = Field(
+        default=True,
+        description="Whether to include the evaluation in the response.",
+        validation_alias=AliasChoices(
+            "includeEvaluation",
+            "displayEvalution",  # typo in old config name
+            "display_evalution",
+        ),
+    )

{unique_toolkit-1.7.0 → unique_toolkit-1.8.1}/unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import logging
-from collections import defaultdict
 from typing import override
 import unique_sdk
@@ -12,8 +11,16 @@ from unique_toolkit.agentic.evaluation.schemas import (
     EvaluationMetricName,
     EvaluationMetricResult,
 )
-from unique_toolkit.agentic.tools.a2a.evaluation.config import SubAgentEvaluationConfig
-from unique_toolkit.agentic.tools.a2a.service import SubAgentTool
+from unique_toolkit.agentic.tools.a2a.evaluation._utils import (
+    _get_valid_assessments,
+    _sort_assessments,
+    _worst_label,
+)
+from unique_toolkit.agentic.tools.a2a.evaluation.config import (
+    SubAgentEvaluationConfig,
+    SubAgentEvaluationServiceConfig,
+)
+from unique_toolkit.agentic.tools.a2a.tool import SubAgentTool
 from unique_toolkit.chat.schemas import (
     ChatMessageAssessmentLabel,
     ChatMessageAssessmentStatus,
@@ -27,20 +34,19 @@ logger = logging.getLogger(__name__)
 class _SubAgentToolInfo(TypedDict):
-    assessments: list[list[unique_sdk.Space.Assessment]]
+    assessments: dict[int, list[unique_sdk.Space.Assessment]]
     display_name: str
 NO_ASSESSMENTS_FOUND = "NO_ASSESSMENTS_FOUND"
-class SubAgentsEvaluation(Evaluation):
+class SubAgentEvaluationService(Evaluation):
     DISPLAY_NAME = "Sub Agents"
     def __init__(
         self,
-        config: SubAgentEvaluationConfig,
-        sub_agent_tools: list[SubAgentTool],
+        config: SubAgentEvaluationServiceConfig,
         language_model_service: LanguageModelService,
     ):
         super().__init__(EvaluationMetricName.SUB_AGENT)
@@ -49,14 +55,6 @@ class SubAgentsEvaluation(Evaluation):
         self._assistant_id_to_tool_info: dict[str, _SubAgentToolInfo] = {}
         self._language_model_service = language_model_service
-        for sub_agent_tool in sub_agent_tools:
-            if sub_agent_tool.config.evaluation_config.display_evalution:
-                sub_agent_tool.subscribe(self)
-                self._assistant_id_to_tool_info[sub_agent_tool.config.assistant_id] = {
-                    "assessments": [],
-                    "display_name": sub_agent_tool.display_name(),
-                }
     @override
     def get_assessment_type(self) -> ChatMessageAssessmentType:
         return self._config.assessment_type
@@ -71,102 +69,46 @@ class SubAgentsEvaluation(Evaluation):
         value = ChatMessageAssessmentLabel.GREEN
-        # Use a dict in order to compare labels (RED being the worst)
-        label_comparison_dict = defaultdict(
-            lambda: 3
-        )  # Unkown labels are highest in the sorting
-        label_comparison_dict[ChatMessageAssessmentLabel.GREEN] = 2
-        label_comparison_dict[ChatMessageAssessmentLabel.YELLOW] = 1
-        label_comparison_dict[ChatMessageAssessmentLabel.RED] = 0
-        for assistant_id, tool_info in self._assistant_id_to_tool_info.items():
+        for tool_info in self._assistant_id_to_tool_info.values():
             sub_agent_assessments = tool_info["assessments"] or []
-            for i, assessments in enumerate(sub_agent_assessments, start=1):
-                valid_assessments = []
-                for assessment in assessments:
-                    if (
-                        assessment["label"] is None
-                        or assessment["label"] not in ChatMessageAssessmentLabel
-                    ):
-                        logger.warning(
-                            "Unkown assistant label %s for assistant %s will be ignored",
-                            assessment["label"],
-                            assistant_id,
-                        )
-                        continue
-                    if assessment["status"] != ChatMessageAssessmentStatus.DONE:
-                        logger.warning(
-                            "Assessment %s for assistant %s is not done (status: %s) will be ignored",
-                            assessment["label"],
-                            assistant_id,
-                        )
-                        continue
-                    valid_assessments.append(assessment)
+            display_name = tool_info["display_name"]
+            for sequence_number in sorted(sub_agent_assessments):
+                assessments = sub_agent_assessments[sequence_number]
+                valid_assessments = _get_valid_assessments(
+                    assessments, display_name, sequence_number
+                )
                 if len(valid_assessments) == 0:
                     logger.info(
-                        "No valid assessment found for assistant %s", assistant_id
+                        "No valid assessment found for assistant %s (sequence number: %s)",
+                        display_name,
+                        sequence_number,
                     )
                     continue
-                assessments = sorted(
-                    valid_assessments, key=lambda x: label_comparison_dict[x["label"]]
-                )
+                assessments = _sort_assessments(valid_assessments)
+                value = _worst_label(value, assessments[0]["label"])  # type: ignore
-                for assessment in assessments:
-                    value = min(
-                        value,
-                        assessment["label"],
-                        key=lambda x: label_comparison_dict[x],
-                    )
                 data = {
                     "name": tool_info["display_name"],
                     "assessments": assessments,
                 }
                 if len(sub_agent_assessments) > 1:
-                    data["name"] += f" {i}"
+                    data["name"] += f" {sequence_number}"
                 sub_agents_display_data.append(data)
         if len(sub_agents_display_data) == 0:
             logger.warning("No valid sub agent assessments found")
             return EvaluationMetricResult(
                 name=self.get_name(),
                 value=NO_ASSESSMENTS_FOUND,
                 reason="No sub agents assessments found",
             )
-        should_summarize = False
-        reason = ""
-        if len(sub_agents_display_data) > 1:
-            should_summarize = True
-        elif len(sub_agents_display_data) == 1:
-            if len(sub_agents_display_data[0]["assessments"]) > 1:
-                should_summarize = True
-            else:
-                reason = (
-                    sub_agents_display_data[0]["assessments"][0]["explanation"] or ""
-                )
-        if should_summarize:
-            messages = (
-                MessagesBuilder()
-                .system_message_append(self._config.summarization_system_message)
-                .user_message_append(
-                    Template(self._config.summarization_user_message_template).render(
-                        sub_agents=sub_agents_display_data,
-                    )
-                )
-                .build()
-            )
-            reason = await self._language_model_service.complete_async(
-                messages=messages,
-                model_name=self._config.summarization_model.name,
-                temperature=0.0,
-            )
-            reason = str(reason.choices[0].message.content)
+        reason = await self._get_reason(sub_agents_display_data)
         return EvaluationMetricResult(
             name=self.get_name(),
@@ -196,8 +138,30 @@ class SubAgentsEvaluation(Evaluation):
             type=self.get_assessment_type(),
         )
+    def register_sub_agent_tool(
+        self, tool: SubAgentTool, evaluation_config: SubAgentEvaluationConfig
+    ) -> None:
+        if not evaluation_config.include_evaluation:
+            logger.warning(
+                "Sub agent tool %s has evaluation config `include_evaluation` set to False, responses will be ignored.",
+                tool.config.assistant_id,
+            )
+            return
+        if tool.config.assistant_id not in self._assistant_id_to_tool_info:
+            tool.subscribe(self)
+            self._assistant_id_to_tool_info[tool.config.assistant_id] = (
+                _SubAgentToolInfo(
+                    display_name=tool.display_name(),
+                    assessments={},
+                )
+            )
     def notify_sub_agent_response(
-        self, sub_agent_assistant_id: str, response: unique_sdk.Space.Message
+        self,
+        response: unique_sdk.Space.Message,
+        sub_agent_assistant_id: str,
+        sequence_number: int,
     ) -> None:
         if sub_agent_assistant_id not in self._assistant_id_to_tool_info:
             logger.warning(
@@ -206,10 +170,39 @@ class SubAgentsEvaluation(Evaluation):
             )
             return
-        self._assistant_id_to_tool_info[sub_agent_assistant_id]["assessments"].append(
+        sub_agent_assessments = self._assistant_id_to_tool_info[sub_agent_assistant_id][
+            "assessments"
+        ]
+        sub_agent_assessments[sequence_number] = (
             response[
                 "assessment"
             ].copy()  # Shallow copy as we don't modify individual assessments
             if response["assessment"] is not None
             else []
         )
+    async def _get_reason(self, sub_agents_display_data: list[dict]) -> str:
+        if (
+            len(sub_agents_display_data) == 1
+            and len(sub_agents_display_data[0]["assessments"]) == 1
+        ):
+            return sub_agents_display_data[0]["assessments"][0]["explanation"] or ""
+        messages = (
+            MessagesBuilder()
+            .system_message_append(self._config.summarization_system_message)
+            .user_message_append(
+                Template(self._config.summarization_user_message_template).render(
+                    sub_agents=sub_agents_display_data,
+                )
+            )
+            .build()
+        )
+        reason = await self._language_model_service.complete_async(
+            messages=messages,
+            model_name=self._config.summarization_model.name,
+            temperature=0.0,
+        )
+        return str(reason.choices[0].message.content)

{unique_toolkit-1.7.0 → unique_toolkit-1.8.1}/unique_toolkit/agentic/tools/a2a/manager.py RENAMED Viewed

@@ -1,8 +1,8 @@
 from logging import Logger
-from unique_toolkit.agentic.tools.a2a.config import SubAgentToolConfig
-from unique_toolkit.agentic.tools.a2a.service import SubAgentTool, ToolProgressReporter
+from unique_toolkit.agentic.tools.a2a.tool import SubAgentTool, SubAgentToolConfig
 from unique_toolkit.agentic.tools.config import ToolBuildConfig
+from unique_toolkit.agentic.tools.tool_progress_reporter import ToolProgressReporter
 from unique_toolkit.app.schemas import ChatEvent

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from unique_toolkit.agentic.tools.a2a.postprocessing.config import (
+    SubAgentDisplayConfig,
+    SubAgentResponseDisplayMode,
+)
+from unique_toolkit.agentic.tools.a2a.postprocessing.postprocessor import (
+    SubAgentResponsesPostprocessor,
+)
+__all__ = [
+    "SubAgentResponsesPostprocessor",
+    "SubAgentResponseDisplayMode",
+    "SubAgentDisplayConfig",
+]

unique_toolkit-1.7.0/unique_toolkit/agentic/tools/a2a/postprocessing/display.py → unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/postprocessing/_display.py RENAMED Viewed

@@ -2,7 +2,9 @@ import re
 from abc import ABC, abstractmethod
 from typing import Literal, override
-from unique_toolkit.agentic.tools.a2a.config import ResponseDisplayMode
+from unique_toolkit.agentic.tools.a2a.postprocessing.config import (
+    SubAgentResponseDisplayMode,
+)
 class _ResponseDisplayHandler(ABC):
@@ -84,13 +86,20 @@ class _DetailsResponseDisplayHandler(_ResponseDisplayHandler):
 _DISPLAY_HANDLERS = {
-    ResponseDisplayMode.DETAILS_OPEN: _DetailsResponseDisplayHandler(mode="open"),
-    ResponseDisplayMode.DETAILS_CLOSED: _DetailsResponseDisplayHandler(mode="closed"),
+    SubAgentResponseDisplayMode.DETAILS_OPEN: _DetailsResponseDisplayHandler(
+        mode="open"
+    ),
+    SubAgentResponseDisplayMode.DETAILS_CLOSED: _DetailsResponseDisplayHandler(
+        mode="closed"
+    ),
 }
-def build_sub_agent_answer_display(
-    display_name: str, display_mode: ResponseDisplayMode, answer: str, assistant_id: str
+def _build_sub_agent_answer_display(
+    display_name: str,
+    display_mode: SubAgentResponseDisplayMode,
+    answer: str,
+    assistant_id: str,
 ) -> str:
     if display_mode not in _DISPLAY_HANDLERS:
         return ""
@@ -102,8 +111,8 @@ def build_sub_agent_answer_display(
     )
-def remove_sub_agent_answer_from_text(
-    display_mode: ResponseDisplayMode, text: str, assistant_id: str
+def _remove_sub_agent_answer_from_text(
+    display_mode: SubAgentResponseDisplayMode, text: str, assistant_id: str
 ) -> str:
     if display_mode not in _DISPLAY_HANDLERS:
         return text

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/postprocessing/_utils.py ADDED Viewed

@@ -0,0 +1,19 @@
+def _replace_references_in_text_non_overlapping(
+    text: str, ref_map: dict[int, int]
+) -> str:
+    for orig, repl in ref_map.items():
+        text = text.replace(f"<sup>{orig}</sup>", f"<sup>{repl}</sup>")
+    return text
+def _replace_references_in_text(text: str, ref_map: dict[int, int]) -> str:
+    # 2 phase replacement, since the map keys and values can overlap
+    max_ref = max(max(ref_map.keys(), default=0), max(ref_map.values(), default=0)) + 1
+    unique_refs = range(max_ref, max_ref + len(ref_map))
+    text = _replace_references_in_text_non_overlapping(
+        text, dict(zip(ref_map.keys(), unique_refs))
+    )
+    return _replace_references_in_text_non_overlapping(
+        text, dict(zip(unique_refs, ref_map.values()))
+    )

unique_toolkit-1.8.1/unique_toolkit/agentic/tools/a2a/postprocessing/config.py ADDED Viewed

@@ -0,0 +1,24 @@
+from enum import StrEnum
+from pydantic import BaseModel, Field
+from unique_toolkit._common.pydantic_helpers import get_configuration_dict
+class SubAgentResponseDisplayMode(StrEnum):
+    HIDDEN = "hidden"
+    DETAILS_OPEN = "details_open"
+    DETAILS_CLOSED = "details_closed"
+class SubAgentDisplayConfig(BaseModel):
+    model_config = get_configuration_dict()
+    mode: SubAgentResponseDisplayMode = Field(
+        default=SubAgentResponseDisplayMode.HIDDEN,
+        description="Controls how to display the sub agent response.",
+    )
+    remove_from_history: bool = Field(
+        default=True,
+        description="If set, sub agent responses will be removed from the history on subsequent calls to the assistant.",
+    )

unique_toolkit 1.7.0__tar.gz → 1.8.1__tar.gz

unique_toolkit 1.7.0tar.gz → 1.8.1tar.gz