unique_toolkit 1.35.0__py3-none-any.whl → 1.35.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +49 -19
- {unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/METADATA +4 -1
- {unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/RECORD +5 -5
- {unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/LICENSE +0 -0
- {unique_toolkit-1.35.0.dist-info → unique_toolkit-1.35.1.dist-info}/WHEEL +0 -0
|
@@ -63,6 +63,10 @@ class LoopTokenReducer:
|
|
|
63
63
|
self._content_service = ContentService.from_event(event)
|
|
64
64
|
self._user_message = event.payload.user_message
|
|
65
65
|
self._chat_id = event.payload.chat_id
|
|
66
|
+
self._effective_token_limit = int(
|
|
67
|
+
self._language_model.token_limits.token_limit_input
|
|
68
|
+
* (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
|
|
69
|
+
)
|
|
66
70
|
|
|
67
71
|
def _get_encoder(self, language_model: LMI) -> tiktoken.Encoding:
|
|
68
72
|
name = language_model.encoder_name or "cl100k_base"
|
|
@@ -95,7 +99,7 @@ class LoopTokenReducer:
|
|
|
95
99
|
|
|
96
100
|
while self._exceeds_token_limit(token_count):
|
|
97
101
|
token_count_before_reduction = token_count
|
|
98
|
-
loop_history = self._handle_token_limit_exceeded(loop_history)
|
|
102
|
+
loop_history = self._handle_token_limit_exceeded(loop_history, token_count)
|
|
99
103
|
messages = self._construct_history(
|
|
100
104
|
history_from_db,
|
|
101
105
|
loop_history,
|
|
@@ -120,14 +124,10 @@ class LoopTokenReducer:
|
|
|
120
124
|
len(chunks) > 1
|
|
121
125
|
for chunks in self._reference_manager.get_chunks_of_all_tools()
|
|
122
126
|
)
|
|
123
|
-
max_tokens = int(
|
|
124
|
-
self._language_model.token_limits.token_limit_input
|
|
125
|
-
* (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
|
|
126
|
-
)
|
|
127
127
|
# TODO: This is not fully correct at the moment as the token_count
|
|
128
128
|
# include system_prompt and user question already
|
|
129
129
|
# TODO: There is a problem if we exceed but only have one chunk per tool call
|
|
130
|
-
exceeds_limit = token_count >
|
|
130
|
+
exceeds_limit = token_count > self._effective_token_limit
|
|
131
131
|
|
|
132
132
|
return has_multiple_chunks_for_a_tool_call and exceeds_limit
|
|
133
133
|
|
|
@@ -171,16 +171,21 @@ class LoopTokenReducer:
|
|
|
171
171
|
return constructed_history
|
|
172
172
|
|
|
173
173
|
def _handle_token_limit_exceeded(
|
|
174
|
-
self, loop_history: list[LanguageModelMessage]
|
|
174
|
+
self, loop_history: list[LanguageModelMessage], token_count: int
|
|
175
175
|
) -> list[LanguageModelMessage]:
|
|
176
176
|
"""Handle case where token limit is exceeded by reducing sources in tool responses."""
|
|
177
|
+
overshoot_factor = (
|
|
178
|
+
token_count / self._effective_token_limit
|
|
179
|
+
if self._effective_token_limit > 0
|
|
180
|
+
else 1.0
|
|
181
|
+
)
|
|
177
182
|
self._logger.warning(
|
|
178
|
-
f"Length of messages
|
|
179
|
-
"Reducing number of sources per tool call.",
|
|
183
|
+
f"Length of messages exceeds limit of {self._effective_token_limit} tokens "
|
|
184
|
+
f"(overshoot factor: {overshoot_factor:.2f}x). Reducing number of sources per tool call.",
|
|
180
185
|
)
|
|
181
186
|
|
|
182
187
|
return self._reduce_message_length_by_reducing_sources_in_tool_response(
|
|
183
|
-
loop_history
|
|
188
|
+
loop_history, overshoot_factor
|
|
184
189
|
)
|
|
185
190
|
|
|
186
191
|
def _replace_user_message(
|
|
@@ -312,10 +317,18 @@ class LoopTokenReducer:
|
|
|
312
317
|
def _reduce_message_length_by_reducing_sources_in_tool_response(
|
|
313
318
|
self,
|
|
314
319
|
history: list[LanguageModelMessage],
|
|
320
|
+
overshoot_factor: float,
|
|
315
321
|
) -> list[LanguageModelMessage]:
|
|
316
322
|
"""
|
|
317
|
-
Reduce the message length by removing
|
|
318
|
-
|
|
323
|
+
Reduce the message length by removing sources from each tool call based on overshoot.
|
|
324
|
+
|
|
325
|
+
The number of chunks to keep per tool call is calculated as:
|
|
326
|
+
chunks_to_keep = num_sources / (overshoot_factor * 0.75)
|
|
327
|
+
|
|
328
|
+
This ensures more aggressive reduction when we're significantly over the limit.
|
|
329
|
+
Using 0.75 factor provides a safety margin to avoid over-reduction.
|
|
330
|
+
E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks.
|
|
331
|
+
Always keeps at least 1 chunk.
|
|
319
332
|
"""
|
|
320
333
|
history_reduced: list[LanguageModelMessage] = []
|
|
321
334
|
content_chunks_reduced: list[ContentChunk] = []
|
|
@@ -328,6 +341,7 @@ class LoopTokenReducer:
|
|
|
328
341
|
message, # type: ignore
|
|
329
342
|
chunk_offset,
|
|
330
343
|
source_offset,
|
|
344
|
+
overshoot_factor,
|
|
331
345
|
)
|
|
332
346
|
content_chunks_reduced.extend(result.reduced_chunks)
|
|
333
347
|
history_reduced.append(result.message)
|
|
@@ -350,10 +364,15 @@ class LoopTokenReducer:
|
|
|
350
364
|
message: LanguageModelToolMessage,
|
|
351
365
|
chunk_offset: int,
|
|
352
366
|
source_offset: int,
|
|
367
|
+
overshoot_factor: float,
|
|
353
368
|
) -> SourceReductionResult:
|
|
354
369
|
"""
|
|
355
|
-
Reduce the sources in the tool message
|
|
356
|
-
|
|
370
|
+
Reduce the sources in the tool message based on overshoot factor.
|
|
371
|
+
|
|
372
|
+
Chunks to keep = num_sources / (overshoot_factor * 0.75)
|
|
373
|
+
This ensures fewer chunks are kept when overshoot is larger.
|
|
374
|
+
E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks
|
|
375
|
+
Always keeps at least 1 chunk.
|
|
357
376
|
"""
|
|
358
377
|
tool_chunks = self._reference_manager.get_chunks_of_tool(message.tool_call_id)
|
|
359
378
|
num_sources = len(tool_chunks)
|
|
@@ -366,16 +385,27 @@ class LoopTokenReducer:
|
|
|
366
385
|
source_offset=source_offset,
|
|
367
386
|
)
|
|
368
387
|
|
|
369
|
-
#
|
|
370
|
-
|
|
388
|
+
# Calculate how many chunks to keep based on overshoot
|
|
389
|
+
# Use 0.75 safety margin for aggressive reduction, but only when overshoot is
|
|
390
|
+
# significant enough (>= ~1.33). Otherwise, the margin would prevent reduction.
|
|
391
|
+
divisor = (
|
|
392
|
+
overshoot_factor * 0.75
|
|
393
|
+
if overshoot_factor * 0.75 >= 1.0
|
|
394
|
+
else overshoot_factor
|
|
395
|
+
)
|
|
396
|
+
chunks_to_keep = max(1, int(num_sources / divisor))
|
|
397
|
+
|
|
398
|
+
# Reduce chunks
|
|
399
|
+
if chunks_to_keep >= num_sources:
|
|
400
|
+
# No reduction needed for this tool call
|
|
371
401
|
reduced_chunks = tool_chunks
|
|
372
402
|
content_chunks_reduced = self._reference_manager.get_chunks()[
|
|
373
403
|
chunk_offset : chunk_offset + num_sources
|
|
374
404
|
]
|
|
375
405
|
else:
|
|
376
|
-
reduced_chunks = tool_chunks[
|
|
406
|
+
reduced_chunks = tool_chunks[:chunks_to_keep]
|
|
377
407
|
content_chunks_reduced = self._reference_manager.get_chunks()[
|
|
378
|
-
chunk_offset : chunk_offset +
|
|
408
|
+
chunk_offset : chunk_offset + chunks_to_keep
|
|
379
409
|
]
|
|
380
410
|
self._reference_manager.replace_chunks_of_tool(
|
|
381
411
|
message.tool_call_id, reduced_chunks
|
|
@@ -392,7 +422,7 @@ class LoopTokenReducer:
|
|
|
392
422
|
message=new_message,
|
|
393
423
|
reduced_chunks=content_chunks_reduced,
|
|
394
424
|
chunk_offset=chunk_offset + num_sources,
|
|
395
|
-
source_offset=source_offset +
|
|
425
|
+
source_offset=source_offset + len(reduced_chunks),
|
|
396
426
|
)
|
|
397
427
|
|
|
398
428
|
def _create_tool_call_message_with_reduced_sources(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unique_toolkit
|
|
3
|
-
Version: 1.35.
|
|
3
|
+
Version: 1.35.1
|
|
4
4
|
Summary:
|
|
5
5
|
License: Proprietary
|
|
6
6
|
Author: Cedric Klinkert
|
|
@@ -121,6 +121,9 @@ All notable changes to this project will be documented in this file.
|
|
|
121
121
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
122
122
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
123
123
|
|
|
124
|
+
## [1.35.1] - 2025-12-05
|
|
125
|
+
- Improve efficiency of token reducer if tool calls overshoot max token limit
|
|
126
|
+
|
|
124
127
|
## [1.35.0] - 2025-12-04
|
|
125
128
|
- Add `LoopIterationRunner` abstraction and support for planning before every loop iteration.
|
|
126
129
|
|
|
@@ -60,7 +60,7 @@ unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py,sha256
|
|
|
60
60
|
unique_toolkit/agentic/evaluation/tests/test_output_parser.py,sha256=RN_HcBbU6qy_e_PoYyUFcjWnp3ymJ6-gLj6TgEOupAI,3107
|
|
61
61
|
unique_toolkit/agentic/history_manager/history_construction_with_contents.py,sha256=kzxpVzTtQqL8TjdIvOy7gkRVxD4BsOMyimECryg7vdc,9060
|
|
62
62
|
unique_toolkit/agentic/history_manager/history_manager.py,sha256=7V7_173XkAjc8otBACF0G3dbqRs34FSlURbBPrE95Wk,9537
|
|
63
|
-
unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=
|
|
63
|
+
unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=PUNR2aTFBUQjD1y3gJC9YlseBUYjbQ68qfig9a65e7w,19824
|
|
64
64
|
unique_toolkit/agentic/history_manager/utils.py,sha256=VIn_UmcR3jHtpux0qp5lQQzczgAm8XYSeQiPo87jC3A,3143
|
|
65
65
|
unique_toolkit/agentic/loop_runner/__init__.py,sha256=QLCYmIyfcKQEbuv1Xm0VuR_xC8JyD2_aMIvt1TRFzvw,517
|
|
66
66
|
unique_toolkit/agentic/loop_runner/_stream_handler_utils.py,sha256=FTGc5y8wkDnwnRVSYEdandgKz-FiySOsrTFFMadwP6E,1706
|
|
@@ -208,7 +208,7 @@ unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBu
|
|
|
208
208
|
unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
209
209
|
unique_toolkit/smart_rules/compile.py,sha256=Ozhh70qCn2yOzRWr9d8WmJeTo7AQurwd3tStgBMPFLA,1246
|
|
210
210
|
unique_toolkit/test_utilities/events.py,sha256=_mwV2bs5iLjxS1ynDCjaIq-gjjKhXYCK-iy3dRfvO3g,6410
|
|
211
|
-
unique_toolkit-1.35.
|
|
212
|
-
unique_toolkit-1.35.
|
|
213
|
-
unique_toolkit-1.35.
|
|
214
|
-
unique_toolkit-1.35.
|
|
211
|
+
unique_toolkit-1.35.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
|
|
212
|
+
unique_toolkit-1.35.1.dist-info/METADATA,sha256=yqoVsVNxeGXbw7h5u9I7bLt_jcB1FF-yonYPGGgUS0o,45818
|
|
213
|
+
unique_toolkit-1.35.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
214
|
+
unique_toolkit-1.35.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|