PyPI - unique_toolkit - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

unique_toolkit 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

unique_toolkit/app/schemas.py +3 -0
unique_toolkit/language_model/infos.py +76 -0
unique_toolkit/language_model/schemas.py +18 -0
unique_toolkit/reference_manager/reference_manager.py +72 -0
unique_toolkit/tools/agent_chunks_handler.py +62 -0
unique_toolkit/tools/config.py +108 -0
unique_toolkit/tools/{tool_factory.py → factory.py} +15 -5
unique_toolkit/tools/schemas.py +138 -0
unique_toolkit/tools/test/test_tool_progress_reporter.py +204 -0
unique_toolkit/tools/tool.py +168 -0
unique_toolkit/tools/tool_manager.py +242 -0
unique_toolkit/tools/tool_progress_reporter.py +4 -11
unique_toolkit/tools/utils/execution/execution.py +282 -0
unique_toolkit/tools/utils/source_handling/schema.py +22 -0
unique_toolkit/tools/utils/source_handling/source_formatting.py +207 -0
unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py +215 -0
{unique_toolkit-0.8.4.dist-info → unique_toolkit-0.8.6.dist-info}/METADATA +7 -1
{unique_toolkit-0.8.4.dist-info → unique_toolkit-0.8.6.dist-info}/RECORD +20 -11
unique_toolkit/tools/tool_definitions.py +0 -145
unique_toolkit/tools/tool_definitionsV2.py +0 -137
{unique_toolkit-0.8.4.dist-info → unique_toolkit-0.8.6.dist-info}/LICENSE +0 -0
{unique_toolkit-0.8.4.dist-info → unique_toolkit-0.8.6.dist-info}/WHEEL +0 -0

unique_toolkit/app/schemas.py CHANGED Viewed

@@ -51,6 +51,7 @@ class BaseEvent(BaseModel):
 # MCP schemas
 ###
 class McpTool(BaseModel):
     model_config = model_config
@@ -79,6 +80,7 @@ class McpTool(BaseModel):
         description="Whether the tool is connected to the MCP server. This is a Unique specific field.",
     )
 class McpServer(BaseModel):
     model_config = model_config
@@ -94,6 +96,7 @@ class McpServer(BaseModel):
     )
     tools: list[McpTool] = []
 ###
 # ChatEvent schemas
 ###

unique_toolkit/language_model/infos.py CHANGED Viewed

@@ -14,6 +14,10 @@ class LanguageModelName(StrEnum):
     AZURE_GPT_4_0613 = "AZURE_GPT_4_0613"
     AZURE_GPT_4_32K_0613 = "AZURE_GPT_4_32K_0613"
     AZURE_GPT_4_TURBO_2024_0409 = "AZURE_GPT_4_TURBO_2024_0409"
+    AZURE_GPT_5_2025_0807 = "AZURE_GPT_5_2025_0807"
+    AZURE_GPT_5_MINI_2025_0807 = "AZURE_GPT_5_MINI_2025_0807"
+    AZURE_GPT_5_NANO_2025_0807 = "AZURE_GPT_5_NANO_2025_0807"
+    AZURE_GPT_5_CHAT_2025_0807 = "AZURE_GPT_5_CHAT_2025_0807"
     AZURE_GPT_4o_2024_0513 = "AZURE_GPT_4o_2024_0513"
     AZURE_GPT_4o_2024_0806 = "AZURE_GPT_4o_2024_0806"
     AZURE_GPT_4o_2024_1120 = "AZURE_GPT_4o_2024_1120"
@@ -63,6 +67,10 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
             | LMN.AZURE_GPT_4o_2024_0806
             | LMN.AZURE_GPT_4o_MINI_2024_0718
             | LMN.AZURE_GPT_4o_2024_1120
+            | LMN.AZURE_GPT_5_2025_0807
+            | LMN.AZURE_GPT_5_MINI_2025_0807
+            | LMN.AZURE_GPT_5_NANO_2025_0807
+            | LMN.AZURE_GPT_5_CHAT_2025_0807
         ):
             return EncoderName.O200K_BASE
         case _:
@@ -161,6 +169,74 @@ class LanguageModelInfo(BaseModel):
                     deprecated_at=date(2024, 10, 1),
                     retirement_at=date(2025, 6, 6),
                 )
+            case LanguageModelName.AZURE_GPT_5_2025_0807:
+                return cls(
+                    name=model_name,
+                    provider=LanguageModelProvider.AZURE,
+                    version="2025-08-07",
+                    encoder_name=EncoderName.O200K_BASE,
+                    capabilities=[
+                        ModelCapabilities.FUNCTION_CALLING,
+                        ModelCapabilities.STREAMING,
+                        ModelCapabilities.REASONING,
+                        ModelCapabilities.VISION,
+                        ModelCapabilities.STRUCTURED_OUTPUT,
+                        ModelCapabilities.PARALLEL_FUNCTION_CALLING,
+                    ],
+                    token_limits=LanguageModelTokenLimits(token_limit_input=272000, token_limit_output=128000),
+                    info_cutoff_at=date(2024, 10, 24),
+                    published_at=date(2025, 8, 7),
+                    deprecated_at=date(2026, 8, 7),
+                    retirement_at=date(2026, 8, 7),
+                )
+            case LanguageModelName.AZURE_GPT_5_MINI_2025_0807:
+                return cls(
+                    name=model_name,
+                    provider=LanguageModelProvider.AZURE,
+                    version="2025-08-07",
+                    encoder_name=EncoderName.O200K_BASE,
+                    capabilities=[
+                        ModelCapabilities.FUNCTION_CALLING,
+                        ModelCapabilities.STREAMING,
+                        ModelCapabilities.VISION,
+                        ModelCapabilities.STRUCTURED_OUTPUT,
+                    ],
+                    token_limits=LanguageModelTokenLimits(token_limit_input=272000, token_limit_output=128000),
+                    info_cutoff_at=date(2024, 6, 24),
+                    published_at=date(2025, 8, 7),
+                    deprecated_at=date(2026, 8, 7),
+                    retirement_at=date(2026, 8, 7),
+                )
+            case LanguageModelName.AZURE_GPT_5_NANO_2025_0807:
+                return cls(
+                    name=model_name,
+                    provider=LanguageModelProvider.AZURE,
+                    version="2025-08-07",
+                    encoder_name=EncoderName.O200K_BASE,
+                    capabilities=[
+                        ModelCapabilities.FUNCTION_CALLING,
+                        ModelCapabilities.STREAMING,
+                        ModelCapabilities.VISION,
+                        ModelCapabilities.STRUCTURED_OUTPUT,
+                    ],
+                    token_limits=LanguageModelTokenLimits(token_limit_input=272000, token_limit_output=128000),
+                    info_cutoff_at=date(2024, 5, 31),
+                    published_at=date(2025, 8, 7),
+                    deprecated_at=date(2026, 8, 7),
+                    retirement_at=date(2026, 8, 7),
+                )
+            case LanguageModelName.AZURE_GPT_5_CHAT_2025_0807:
+                return cls(
+                    name=model_name,
+                    provider=LanguageModelProvider.AZURE,
+                    version="2025-08-07",
+                    encoder_name=EncoderName.O200K_BASE,
+                    token_limits=LanguageModelTokenLimits(token_limit_input=128000, token_limit_output=16384),
+                    info_cutoff_at=date(2024, 10, 24),
+                    published_at=date(2025, 8, 7),
+                    deprecated_at=date(2026, 8, 7),
+                    retirement_at=date(2026, 8, 7),
+                )
             case LanguageModelName.AZURE_GPT_4_TURBO_2024_0409:
                 return cls(
                     name=model_name,

unique_toolkit/language_model/schemas.py CHANGED Viewed

@@ -86,6 +86,24 @@ class LanguageModelFunction(BaseModel):
         return seralization
+    def __eq__(self, other:Self) -> bool:
+        """
+        Compare two tool calls based on name and arguments.
+        """
+        if not isinstance(other, LanguageModelFunction):
+            return False
+        if self.id != other.id:
+            return False
+        if self.name != other.name:
+            return False
+        if self.arguments != other.arguments:
+            return False
+        return True
 # This is tailored to the unique backend
 class LanguageModelStreamResponse(BaseModel):
     model_config = model_config

unique_toolkit/reference_manager/reference_manager.py ADDED Viewed

@@ -0,0 +1,72 @@
+from unique_toolkit.content.schemas import ContentChunk, ContentReference
+from unique_toolkit.tools.schemas import ToolCallResponse
+class tool_chunks:
+    def __init__(self, name: str, chunks: list) -> None:
+        self.name = name
+        self.chunks = chunks
+class ReferenceManager:
+    def __init__(self):
+        self._tool_chunks: dict[str, tool_chunks] = {}
+        self._chunks: list[ContentChunk] = []
+        self._references: list[list[ContentReference]] = []
+    def extract_referenceable_chunks(
+        self, tool_responses: list[ToolCallResponse]
+    ) -> None:
+        for tool_response in tool_responses:
+            if not tool_response.content_chunks:
+                continue
+            self._chunks.extend(tool_response.content_chunks or [])
+            self._tool_chunks[tool_response.id] = tool_chunks(
+                tool_response.name, tool_response.content_chunks
+            )
+    def get_chunks(self) -> list[ContentChunk]:
+        return self._chunks
+    def get_tool_chunks(self) -> dict:
+        return self._tool_chunks
+    def replace(self, chunks: list[ContentChunk]):
+        self._chunks = chunks
+    def add_references(
+        self,
+        references: list[ContentReference],
+    ):
+        self._references.append(references)
+    def get_references(
+        self,
+    ) -> list[list[ContentReference]]:
+        return self._references
+    def get_latest_references(
+        self,
+    ) -> list[ContentReference]:
+        if not self._references:
+            return []
+        return self._references[-1]
+    def get_latest_referenced_chunks(self) -> list[ContentChunk]:
+        if not self._references:
+            return []
+        return self._get_referenced_chunks_from_references(self._references[-1])
+    def _get_referenced_chunks_from_references(
+        self,
+        references: list[ContentReference],
+    ) -> list[ContentChunk]:
+        """
+        Get _referenced_chunks by matching sourceId from _references with merged id and chunk_id from _chunks.
+        """
+        referenced_chunks: list[ContentChunk] = []
+        for ref in references:
+            for chunk in self._chunks:
+                if ref.source_id == f"{chunk.id}-{chunk.chunk_id}":
+                    referenced_chunks.append(chunk)
+        return referenced_chunks

unique_toolkit/tools/agent_chunks_handler.py ADDED Viewed

@@ -0,0 +1,62 @@
+from unique_toolkit.content.schemas import ContentChunk, ContentReference
+class AgentChunksHandler:
+    def __init__(self):
+        self._tool_chunks = {}
+        self._chunks: list[ContentChunk] = []
+        self._references: list[list[ContentReference]] = []
+    @property
+    def chunks(self) -> list[ContentChunk]:
+        return self._chunks
+    @property
+    def tool_chunks(self) -> dict:
+        return self._tool_chunks
+    def extend(self, chunks: list[ContentChunk]):
+        self._chunks.extend(chunks)
+    def replace(self, chunks: list[ContentChunk]):
+        self._chunks = chunks
+    def add_references(
+        self,
+        references: list[ContentReference],
+    ):
+        self._references.append(references)
+    @property
+    def all_references(
+        self,
+    ) -> list[list[ContentReference]]:
+        return self._references
+    @property
+    def latest_references(
+        self,
+    ) -> list[ContentReference]:
+        if not self._references:
+            return []
+        return self._references[-1]
+    @property
+    def latest_referenced_chunks(self) -> list[ContentChunk]:
+        if not self._references:
+            return []
+        return self._get_referenced_chunks_from_references(self._references[-1])
+    def _get_referenced_chunks_from_references(
+        self,
+        references: list[ContentReference],
+    ) -> list[ContentChunk]:
+        """
+        Get _referenced_chunks by matching sourceId from _references with merged id and chunk_id from _chunks.
+        """
+        referenced_chunks: list[ContentChunk] = []
+        for ref in references:
+            for chunk in self._chunks:
+                if ref.source_id == str(chunk.id) + "_" + str(chunk.chunk_id):
+                    referenced_chunks.append(chunk)
+        return referenced_chunks

unique_toolkit/tools/config.py ADDED Viewed

@@ -0,0 +1,108 @@
+from enum import StrEnum
+import humps
+from typing import Any
+from pydantic.fields import ComputedFieldInfo, FieldInfo
+from pydantic.alias_generators import to_camel
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    ValidationInfo,
+    model_validator,
+)
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from unique_toolkit.tools.schemas import BaseToolConfig
+def field_title_generator(
+    title: str,
+    info: FieldInfo | ComputedFieldInfo,
+) -> str:
+    return humps.decamelize(title).replace("_", " ").title()
+def model_title_generator(model: type) -> str:
+    return humps.decamelize(model.__name__).replace("_", " ").title()
+def get_configuration_dict(**kwargs) -> ConfigDict:
+    return ConfigDict(
+        alias_generator=to_camel,
+        field_title_generator=field_title_generator,
+        model_title_generator=model_title_generator,
+        populate_by_name=True,
+        protected_namespaces=(),
+        **kwargs,
+    )
+class ToolIcon(StrEnum):
+    ANALYTICS = "IconAnalytics"
+    BOOK = "IconBook"
+    FOLDERDATA = "IconFolderData"
+    INTEGRATION = "IconIntegration"
+    TEXT_COMPARE = "IconTextCompare"
+    WORLD = "IconWorld"
+    QUICK_REPLY = "IconQuickReply"
+    CHAT_PLUS = "IconChatPlus"
+class ToolSelectionPolicy(StrEnum):
+    """Determine the usage policy of tools."""
+    FORCED_BY_DEFAULT = "ForcedByDefault"
+    ON_BY_DEFAULT = "OnByDefault"
+    BY_USER = "ByUser"
+class ToolBuildConfig(BaseModel):
+    model_config = get_configuration_dict()
+    """Main tool configuration"""
+    name: str
+    configuration: "BaseToolConfig"
+    display_name: str = ""
+    icon: ToolIcon = ToolIcon.BOOK
+    selection_policy: ToolSelectionPolicy = Field(
+        default=ToolSelectionPolicy.BY_USER,
+    )
+    is_exclusive: bool = Field(
+        default=False,
+        description="This tool must be chosen by the user and no other tools are used for this iteration.",
+    )
+    is_enabled: bool = Field(default=True)
+    @model_validator(mode="before")
+    def initialize_config_based_on_tool_name(
+        cls,
+        value: Any,
+        info: ValidationInfo,
+    ) -> Any:
+        """Check the given values for."""
+        if not isinstance(value, dict):
+            return value
+        configuration = value.get("configuration", {})
+        if isinstance(configuration, dict):
+            # Local import to avoid circular import at module import time
+            from unique_toolkit.tools.factory import ToolFactory
+            config = ToolFactory.build_tool_config(
+                value["name"],
+                **configuration,
+            )
+        else:
+            # Check that the type of config matches the tool name
+            from unique_toolkit.tools.factory import ToolFactory
+            assert isinstance(
+                configuration,
+                ToolFactory.tool_config_map[value["name"]],  # type: ignore
+            )
+            config = configuration
+        value["configuration"] = config
+        return value

unique_toolkit/tools/{tool_factory.py → factory.py} RENAMED Viewed

@@ -1,7 +1,11 @@
 from typing import Callable
-from unique_toolkit.unique_toolkit.tools.tool_definitions import BaseToolConfig, Tool
+from typing import TYPE_CHECKING
+from unique_toolkit.tools.schemas import BaseToolConfig
+from unique_toolkit.tools.tool import Tool
+if TYPE_CHECKING:
+    from unique_toolkit.tools.config import ToolBuildConfig
 class ToolFactory:
@@ -18,14 +22,20 @@ class ToolFactory:
         cls.tool_config_map[tool.name] = tool_config
     @classmethod
-    def build_tool(cls, tool_name: str, *args, **kwargs) -> Tool:
+    def build_tool(cls, tool_name: str, *args, **kwargs) -> Tool[BaseToolConfig]:
         tool = cls.tool_map[tool_name](*args, **kwargs)
         return tool
     @classmethod
-    def build_tool_config(
-        cls, tool_name: str, **kwargs
-    ) -> BaseToolConfig:
+    def build_tool_with_settings(
+        cls, tool_name: str, settings: "ToolBuildConfig", *args, **kwargs
+    ) -> Tool[BaseToolConfig]:
+        tool = cls.tool_map[tool_name](*args, **kwargs)
+        tool.settings = settings
+        return tool
+    @classmethod
+    def build_tool_config(cls, tool_name: str, **kwargs) -> BaseToolConfig:
         if tool_name not in cls.tool_config_map:
             raise ValueError(f"Tool {tool_name} not found")
         return cls.tool_config_map[tool_name](**kwargs)

unique_toolkit/tools/schemas.py ADDED Viewed

@@ -0,0 +1,138 @@
+import base64
+import gzip
+import re
+from typing import Any, Optional
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
+from unique_toolkit.content.schemas import ContentChunk
+from unique_toolkit.tools.config import get_configuration_dict
+from unique_toolkit.tools.utils.source_handling.schema import SourceFormatConfig
+# TODO: this needs to be more general as the tools can potentially return anything maybe make a base class and then derive per "type" of tool
+class ToolCallResponse(BaseModel):
+    id: str
+    name: str
+    debug_info: Optional[dict] = None  # TODO: Make the default {}
+    content_chunks: Optional[list[ContentChunk]] = None  # TODO: Make the default []
+    reasoning_result: Optional[dict] = None  # TODO: Make the default {}
+    error_message: str = ""
+    @property
+    def successful(self) -> bool:
+        return self.error_message == ""
+class BaseToolConfig(BaseModel):
+    model_config = get_configuration_dict()
+    # TODO: add a check for the parameters to all be consistent within the tool config
+    pass
+class Source(BaseModel):
+    """Represents the sources in the tool call response that the llm will see
+    Args:
+        source_number: The number of the source
+        content: The content of the source
+    """
+    model_config = ConfigDict(
+        validate_by_alias=True, serialize_by_alias=True, validate_by_name=True
+    )
+    source_number: int | None = Field(
+        default=None,
+        serialization_alias="[source_number] - Used for citations!",
+        validation_alias="[source_number] - Used for citations!",
+    )
+    content: str = Field(
+        serialization_alias="[content] - Content of source",
+        validation_alias="[content] - Content of source",
+    )
+    order: int = Field(
+        serialization_alias="[order] - Index in the document!",
+        validation_alias="[order] - Index in the document!",
+    )
+    chunk_id: str | None = Field(
+        default=None,
+        serialization_alias="[chunk_id] - IGNORE",
+        validation_alias="[chunk_id] - IGNORE",
+    )
+    id: str = Field(
+        serialization_alias="[id] - IGNORE",
+        validation_alias="[id] - IGNORE",
+    )
+    key: str | None = Field(
+        default=None,
+        serialization_alias="[key] - IGNORE",
+        validation_alias="[key] - IGNORE",
+    )
+    metadata: dict[str, str] | str | None = Field(
+        default=None,
+        serialization_alias="[metadata] - Formatted metadata",
+        validation_alias="[metadata] - Formatted metadata",
+    )
+    url: str | None = Field(
+        default=None,
+        serialization_alias="[url] - IGNORE",
+        validation_alias="[url] - IGNORE",
+    )
+    @field_validator("metadata", mode="before")
+    def _metadata_str_to_dict(
+        cls, v: str | dict[str, str] | None
+    ) -> dict[str, str] | None:
+        """
+        Accept   • dict   → keep as-is
+                 • str    → parse tag-string back to dict
+        """
+        if v is None or isinstance(v, dict):
+            return v
+        # v is the rendered string.  Build a dict by matching the
+        # patterns defined in SourceFormatConfig.sections.
+        cfg = SourceFormatConfig()  # or inject your app-wide config
+        out: dict[str, str] = {}
+        for key, tmpl in cfg.sections.items():
+            pattern = cfg.template_to_pattern(tmpl)
+            m = re.search(pattern, v, flags=re.S)
+            if m:
+                out[key] = m.group(1).strip()
+        return out if out else v  # type: ignore
+    # Compression + Base64 for url to hide it from the LLM
+    @field_serializer("url")
+    def serialize_url(self, value: str | None) -> str | None:
+        if value is None:
+            return None
+        # Compress then base64 encode
+        compressed = gzip.compress(value.encode())
+        return base64.b64encode(compressed).decode()
+    @field_validator("url", mode="before")
+    @classmethod
+    def validate_url(cls, value: Any) -> str | None:
+        if value is None or isinstance(value, str) and not value:
+            return None
+        if isinstance(value, str):
+            try:
+                # Try to decode base64 then decompress
+                decoded_bytes = base64.b64decode(value.encode())
+                decompressed = gzip.decompress(decoded_bytes).decode()
+                return decompressed
+            except Exception:
+                # If decoding/decompression fails, assume it's plain text
+                return value
+        return str(value)
+class ToolPrompts(BaseModel):
+    name: str
+    display_name: str
+    tool_description: str
+    tool_format_information_for_system_prompt: str
+    tool_format_information_for_system_prompt: str
+    input_model: dict[str, Any]

unique_toolkit 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

unique_toolkit 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl