PyPI - unique_toolkit - Versions diffs - 1.35.0__py3-none-any.whl → 1.35.1__py3-none-any.whl - Mend

unique_toolkit 1.35.0py3-none-any.whl → 1.35.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

unique_toolkit/agentic/history_manager/loop_token_reducer.py CHANGED Viewed

@@ -63,6 +63,10 @@ class LoopTokenReducer:
         self._content_service = ContentService.from_event(event)
         self._user_message = event.payload.user_message
         self._chat_id = event.payload.chat_id
+        self._effective_token_limit = int(
+            self._language_model.token_limits.token_limit_input
+            * (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
+        )
     def _get_encoder(self, language_model: LMI) -> tiktoken.Encoding:
         name = language_model.encoder_name or "cl100k_base"
@@ -95,7 +99,7 @@ class LoopTokenReducer:
         while self._exceeds_token_limit(token_count):
             token_count_before_reduction = token_count
-            loop_history = self._handle_token_limit_exceeded(loop_history)
+            loop_history = self._handle_token_limit_exceeded(loop_history, token_count)
             messages = self._construct_history(
                 history_from_db,
                 loop_history,
@@ -120,14 +124,10 @@ class LoopTokenReducer:
             len(chunks) > 1
             for chunks in self._reference_manager.get_chunks_of_all_tools()
         )
-        max_tokens = int(
-            self._language_model.token_limits.token_limit_input
-            * (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
-        )
         # TODO: This is not fully correct at the moment as the token_count
         # include system_prompt and user question already
         # TODO: There is a problem if we exceed but only have one chunk per tool call
-        exceeds_limit = token_count > max_tokens
+        exceeds_limit = token_count > self._effective_token_limit
         return has_multiple_chunks_for_a_tool_call and exceeds_limit
@@ -171,16 +171,21 @@ class LoopTokenReducer:
         return constructed_history
     def _handle_token_limit_exceeded(
-        self, loop_history: list[LanguageModelMessage]
+        self, loop_history: list[LanguageModelMessage], token_count: int
     ) -> list[LanguageModelMessage]:
         """Handle case where token limit is exceeded by reducing sources in tool responses."""
+        overshoot_factor = (
+            token_count / self._effective_token_limit
+            if self._effective_token_limit > 0
+            else 1.0
+        )
         self._logger.warning(
-            f"Length of messages is exceeds limit of {self._language_model.token_limits.token_limit_input} tokens. "
-            "Reducing number of sources per tool call.",
+            f"Length of messages exceeds limit of {self._effective_token_limit} tokens "
+            f"(overshoot factor: {overshoot_factor:.2f}x). Reducing number of sources per tool call.",
         )
         return self._reduce_message_length_by_reducing_sources_in_tool_response(
-            loop_history
+            loop_history, overshoot_factor
         )
     def _replace_user_message(
@@ -312,10 +317,18 @@ class LoopTokenReducer:
     def _reduce_message_length_by_reducing_sources_in_tool_response(
         self,
         history: list[LanguageModelMessage],
+        overshoot_factor: float,
     ) -> list[LanguageModelMessage]:
         """
-        Reduce the message length by removing the last source result of each tool call.
-        If there is only one source for a tool call, the tool call message is returned unchanged.
+        Reduce the message length by removing sources from each tool call based on overshoot.
+        The number of chunks to keep per tool call is calculated as:
+        chunks_to_keep = num_sources / (overshoot_factor * 0.75)
+        This ensures more aggressive reduction when we're significantly over the limit.
+        Using 0.75 factor provides a safety margin to avoid over-reduction.
+        E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks.
+        Always keeps at least 1 chunk.
         """
         history_reduced: list[LanguageModelMessage] = []
         content_chunks_reduced: list[ContentChunk] = []
@@ -328,6 +341,7 @@ class LoopTokenReducer:
                     message,  # type: ignore
                     chunk_offset,
                     source_offset,
+                    overshoot_factor,
                 )
                 content_chunks_reduced.extend(result.reduced_chunks)
                 history_reduced.append(result.message)
@@ -350,10 +364,15 @@ class LoopTokenReducer:
         message: LanguageModelToolMessage,
         chunk_offset: int,
         source_offset: int,
+        overshoot_factor: float,
     ) -> SourceReductionResult:
         """
-        Reduce the sources in the tool message by removing the last source.
-        If there is only one source, the message is returned unchanged.
+        Reduce the sources in the tool message based on overshoot factor.
+        Chunks to keep = num_sources / (overshoot_factor * 0.75)
+        This ensures fewer chunks are kept when overshoot is larger.
+        E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks
+        Always keeps at least 1 chunk.
         """
         tool_chunks = self._reference_manager.get_chunks_of_tool(message.tool_call_id)
         num_sources = len(tool_chunks)
@@ -366,16 +385,27 @@ class LoopTokenReducer:
                 source_offset=source_offset,
             )
-        # Reduce chunks, keeping all but the last one if multiple exist
-        if num_sources == 1:
+        # Calculate how many chunks to keep based on overshoot
+        # Use 0.75 safety margin for aggressive reduction, but only when overshoot is
+        # significant enough (>= ~1.33). Otherwise, the margin would prevent reduction.
+        divisor = (
+            overshoot_factor * 0.75
+            if overshoot_factor * 0.75 >= 1.0
+            else overshoot_factor
+        )
+        chunks_to_keep = max(1, int(num_sources / divisor))
+        # Reduce chunks
+        if chunks_to_keep >= num_sources:
+            # No reduction needed for this tool call
             reduced_chunks = tool_chunks
             content_chunks_reduced = self._reference_manager.get_chunks()[
                 chunk_offset : chunk_offset + num_sources
             ]
         else:
-            reduced_chunks = tool_chunks[:-1]
+            reduced_chunks = tool_chunks[:chunks_to_keep]
             content_chunks_reduced = self._reference_manager.get_chunks()[
-                chunk_offset : chunk_offset + num_sources - 1
+                chunk_offset : chunk_offset + chunks_to_keep
             ]
             self._reference_manager.replace_chunks_of_tool(
                 message.tool_call_id, reduced_chunks
@@ -392,7 +422,7 @@ class LoopTokenReducer:
             message=new_message,
             reduced_chunks=content_chunks_reduced,
             chunk_offset=chunk_offset + num_sources,
-            source_offset=source_offset + num_sources - (1 if num_sources != 1 else 0),
+            source_offset=source_offset + len(reduced_chunks),
         )
     def _create_tool_call_message_with_reduced_sources(

{unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unique_toolkit
-Version: 1.35.0
+Version: 1.35.1
 Summary:
 License: Proprietary
 Author: Cedric Klinkert
@@ -121,6 +121,9 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.35.1] - 2025-12-05
+- Improve efficiency of token reducer if tool calls overshoot max token limit
 ## [1.35.0] - 2025-12-04
 - Add `LoopIterationRunner` abstraction and support for planning before every loop iteration.

{unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/RECORD RENAMED Viewed

@@ -60,7 +60,7 @@ unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py,sha256
 unique_toolkit/agentic/evaluation/tests/test_output_parser.py,sha256=RN_HcBbU6qy_e_PoYyUFcjWnp3ymJ6-gLj6TgEOupAI,3107
 unique_toolkit/agentic/history_manager/history_construction_with_contents.py,sha256=kzxpVzTtQqL8TjdIvOy7gkRVxD4BsOMyimECryg7vdc,9060
 unique_toolkit/agentic/history_manager/history_manager.py,sha256=7V7_173XkAjc8otBACF0G3dbqRs34FSlURbBPrE95Wk,9537
-unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=4XUX2-yVBnaYthV8p0zj2scVBUdK_3IhxBgoNlrytyQ,18498
+unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=PUNR2aTFBUQjD1y3gJC9YlseBUYjbQ68qfig9a65e7w,19824
 unique_toolkit/agentic/history_manager/utils.py,sha256=VIn_UmcR3jHtpux0qp5lQQzczgAm8XYSeQiPo87jC3A,3143
 unique_toolkit/agentic/loop_runner/__init__.py,sha256=QLCYmIyfcKQEbuv1Xm0VuR_xC8JyD2_aMIvt1TRFzvw,517
 unique_toolkit/agentic/loop_runner/_stream_handler_utils.py,sha256=FTGc5y8wkDnwnRVSYEdandgKz-FiySOsrTFFMadwP6E,1706
@@ -208,7 +208,7 @@ unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBu
 unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unique_toolkit/smart_rules/compile.py,sha256=Ozhh70qCn2yOzRWr9d8WmJeTo7AQurwd3tStgBMPFLA,1246
 unique_toolkit/test_utilities/events.py,sha256=_mwV2bs5iLjxS1ynDCjaIq-gjjKhXYCK-iy3dRfvO3g,6410
-unique_toolkit-1.35.0.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
-unique_toolkit-1.35.0.dist-info/METADATA,sha256=9VeZPMOuD-dYgXiIG2dVpTHxYsqJKXM5sADlIGF1pIc,45714
-unique_toolkit-1.35.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-unique_toolkit-1.35.0.dist-info/RECORD,,
+unique_toolkit-1.35.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
+unique_toolkit-1.35.1.dist-info/METADATA,sha256=yqoVsVNxeGXbw7h5u9I7bLt_jcB1FF-yonYPGGgUS0o,45818
+unique_toolkit-1.35.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+unique_toolkit-1.35.1.dist-info/RECORD,,

{unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/WHEEL RENAMED Viewed

File without changes

unique_toolkit 1.35.0__py3-none-any.whl → 1.35.1__py3-none-any.whl

unique_toolkit 1.35.0py3-none-any.whl → 1.35.1py3-none-any.whl