unique_toolkit 1.35.0__py3-none-any.whl → 1.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,10 @@ class LoopTokenReducer:
63
63
  self._content_service = ContentService.from_event(event)
64
64
  self._user_message = event.payload.user_message
65
65
  self._chat_id = event.payload.chat_id
66
+ self._effective_token_limit = int(
67
+ self._language_model.token_limits.token_limit_input
68
+ * (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
69
+ )
66
70
 
67
71
  def _get_encoder(self, language_model: LMI) -> tiktoken.Encoding:
68
72
  name = language_model.encoder_name or "cl100k_base"
@@ -95,7 +99,7 @@ class LoopTokenReducer:
95
99
 
96
100
  while self._exceeds_token_limit(token_count):
97
101
  token_count_before_reduction = token_count
98
- loop_history = self._handle_token_limit_exceeded(loop_history)
102
+ loop_history = self._handle_token_limit_exceeded(loop_history, token_count)
99
103
  messages = self._construct_history(
100
104
  history_from_db,
101
105
  loop_history,
@@ -120,14 +124,10 @@ class LoopTokenReducer:
120
124
  len(chunks) > 1
121
125
  for chunks in self._reference_manager.get_chunks_of_all_tools()
122
126
  )
123
- max_tokens = int(
124
- self._language_model.token_limits.token_limit_input
125
- * (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
126
- )
127
127
  # TODO: This is not fully correct at the moment as the token_count
128
128
  # include system_prompt and user question already
129
129
  # TODO: There is a problem if we exceed but only have one chunk per tool call
130
- exceeds_limit = token_count > max_tokens
130
+ exceeds_limit = token_count > self._effective_token_limit
131
131
 
132
132
  return has_multiple_chunks_for_a_tool_call and exceeds_limit
133
133
 
@@ -171,16 +171,21 @@ class LoopTokenReducer:
171
171
  return constructed_history
172
172
 
173
173
  def _handle_token_limit_exceeded(
174
- self, loop_history: list[LanguageModelMessage]
174
+ self, loop_history: list[LanguageModelMessage], token_count: int
175
175
  ) -> list[LanguageModelMessage]:
176
176
  """Handle case where token limit is exceeded by reducing sources in tool responses."""
177
+ overshoot_factor = (
178
+ token_count / self._effective_token_limit
179
+ if self._effective_token_limit > 0
180
+ else 1.0
181
+ )
177
182
  self._logger.warning(
178
- f"Length of messages is exceeds limit of {self._language_model.token_limits.token_limit_input} tokens. "
179
- "Reducing number of sources per tool call.",
183
+ f"Length of messages exceeds limit of {self._effective_token_limit} tokens "
184
+ f"(overshoot factor: {overshoot_factor:.2f}x). Reducing number of sources per tool call.",
180
185
  )
181
186
 
182
187
  return self._reduce_message_length_by_reducing_sources_in_tool_response(
183
- loop_history
188
+ loop_history, overshoot_factor
184
189
  )
185
190
 
186
191
  def _replace_user_message(
@@ -312,10 +317,18 @@ class LoopTokenReducer:
312
317
  def _reduce_message_length_by_reducing_sources_in_tool_response(
313
318
  self,
314
319
  history: list[LanguageModelMessage],
320
+ overshoot_factor: float,
315
321
  ) -> list[LanguageModelMessage]:
316
322
  """
317
- Reduce the message length by removing the last source result of each tool call.
318
- If there is only one source for a tool call, the tool call message is returned unchanged.
323
+ Reduce the message length by removing sources from each tool call based on overshoot.
324
+
325
+ The number of chunks to keep per tool call is calculated as:
326
+ chunks_to_keep = num_sources / (overshoot_factor * 0.75)
327
+
328
+ This ensures more aggressive reduction when we're significantly over the limit.
329
+ Using 0.75 factor provides a safety margin to avoid over-reduction.
330
+ E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks.
331
+ Always keeps at least 1 chunk.
319
332
  """
320
333
  history_reduced: list[LanguageModelMessage] = []
321
334
  content_chunks_reduced: list[ContentChunk] = []
@@ -328,6 +341,7 @@ class LoopTokenReducer:
328
341
  message, # type: ignore
329
342
  chunk_offset,
330
343
  source_offset,
344
+ overshoot_factor,
331
345
  )
332
346
  content_chunks_reduced.extend(result.reduced_chunks)
333
347
  history_reduced.append(result.message)
@@ -350,10 +364,15 @@ class LoopTokenReducer:
350
364
  message: LanguageModelToolMessage,
351
365
  chunk_offset: int,
352
366
  source_offset: int,
367
+ overshoot_factor: float,
353
368
  ) -> SourceReductionResult:
354
369
  """
355
- Reduce the sources in the tool message by removing the last source.
356
- If there is only one source, the message is returned unchanged.
370
+ Reduce the sources in the tool message based on overshoot factor.
371
+
372
+ Chunks to keep = num_sources / (overshoot_factor * 0.75)
373
+ This ensures fewer chunks are kept when overshoot is larger.
374
+ E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks
375
+ Always keeps at least 1 chunk.
357
376
  """
358
377
  tool_chunks = self._reference_manager.get_chunks_of_tool(message.tool_call_id)
359
378
  num_sources = len(tool_chunks)
@@ -366,16 +385,27 @@ class LoopTokenReducer:
366
385
  source_offset=source_offset,
367
386
  )
368
387
 
369
- # Reduce chunks, keeping all but the last one if multiple exist
370
- if num_sources == 1:
388
+ # Calculate how many chunks to keep based on overshoot
389
+ # Use 0.75 safety margin for aggressive reduction, but only when overshoot is
390
+ # significant enough (>= ~1.33). Otherwise, the margin would prevent reduction.
391
+ divisor = (
392
+ overshoot_factor * 0.75
393
+ if overshoot_factor * 0.75 >= 1.0
394
+ else overshoot_factor
395
+ )
396
+ chunks_to_keep = max(1, int(num_sources / divisor))
397
+
398
+ # Reduce chunks
399
+ if chunks_to_keep >= num_sources:
400
+ # No reduction needed for this tool call
371
401
  reduced_chunks = tool_chunks
372
402
  content_chunks_reduced = self._reference_manager.get_chunks()[
373
403
  chunk_offset : chunk_offset + num_sources
374
404
  ]
375
405
  else:
376
- reduced_chunks = tool_chunks[:-1]
406
+ reduced_chunks = tool_chunks[:chunks_to_keep]
377
407
  content_chunks_reduced = self._reference_manager.get_chunks()[
378
- chunk_offset : chunk_offset + num_sources - 1
408
+ chunk_offset : chunk_offset + chunks_to_keep
379
409
  ]
380
410
  self._reference_manager.replace_chunks_of_tool(
381
411
  message.tool_call_id, reduced_chunks
@@ -392,7 +422,7 @@ class LoopTokenReducer:
392
422
  message=new_message,
393
423
  reduced_chunks=content_chunks_reduced,
394
424
  chunk_offset=chunk_offset + num_sources,
395
- source_offset=source_offset + num_sources - (1 if num_sources != 1 else 0),
425
+ source_offset=source_offset + len(reduced_chunks),
396
426
  )
397
427
 
398
428
  def _create_tool_call_message_with_reduced_sources(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 1.35.0
3
+ Version: 1.35.1
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Cedric Klinkert
@@ -121,6 +121,9 @@ All notable changes to this project will be documented in this file.
121
121
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
122
122
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
123
123
 
124
+ ## [1.35.1] - 2025-12-05
125
+ - Improve efficiency of token reducer if tool calls overshoot max token limit
126
+
124
127
  ## [1.35.0] - 2025-12-04
125
128
  - Add `LoopIterationRunner` abstraction and support for planning before every loop iteration.
126
129
 
@@ -60,7 +60,7 @@ unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py,sha256
60
60
  unique_toolkit/agentic/evaluation/tests/test_output_parser.py,sha256=RN_HcBbU6qy_e_PoYyUFcjWnp3ymJ6-gLj6TgEOupAI,3107
61
61
  unique_toolkit/agentic/history_manager/history_construction_with_contents.py,sha256=kzxpVzTtQqL8TjdIvOy7gkRVxD4BsOMyimECryg7vdc,9060
62
62
  unique_toolkit/agentic/history_manager/history_manager.py,sha256=7V7_173XkAjc8otBACF0G3dbqRs34FSlURbBPrE95Wk,9537
63
- unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=4XUX2-yVBnaYthV8p0zj2scVBUdK_3IhxBgoNlrytyQ,18498
63
+ unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=PUNR2aTFBUQjD1y3gJC9YlseBUYjbQ68qfig9a65e7w,19824
64
64
  unique_toolkit/agentic/history_manager/utils.py,sha256=VIn_UmcR3jHtpux0qp5lQQzczgAm8XYSeQiPo87jC3A,3143
65
65
  unique_toolkit/agentic/loop_runner/__init__.py,sha256=QLCYmIyfcKQEbuv1Xm0VuR_xC8JyD2_aMIvt1TRFzvw,517
66
66
  unique_toolkit/agentic/loop_runner/_stream_handler_utils.py,sha256=FTGc5y8wkDnwnRVSYEdandgKz-FiySOsrTFFMadwP6E,1706
@@ -208,7 +208,7 @@ unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBu
208
208
  unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
209
  unique_toolkit/smart_rules/compile.py,sha256=Ozhh70qCn2yOzRWr9d8WmJeTo7AQurwd3tStgBMPFLA,1246
210
210
  unique_toolkit/test_utilities/events.py,sha256=_mwV2bs5iLjxS1ynDCjaIq-gjjKhXYCK-iy3dRfvO3g,6410
211
- unique_toolkit-1.35.0.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
212
- unique_toolkit-1.35.0.dist-info/METADATA,sha256=9VeZPMOuD-dYgXiIG2dVpTHxYsqJKXM5sADlIGF1pIc,45714
213
- unique_toolkit-1.35.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
214
- unique_toolkit-1.35.0.dist-info/RECORD,,
211
+ unique_toolkit-1.35.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
212
+ unique_toolkit-1.35.1.dist-info/METADATA,sha256=yqoVsVNxeGXbw7h5u9I7bLt_jcB1FF-yonYPGGgUS0o,45818
213
+ unique_toolkit-1.35.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
214
+ unique_toolkit-1.35.1.dist-info/RECORD,,