unique_toolkit 1.34.1__py3-none-any.whl → 1.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,6 +63,10 @@ class LoopTokenReducer:
63
63
  self._content_service = ContentService.from_event(event)
64
64
  self._user_message = event.payload.user_message
65
65
  self._chat_id = event.payload.chat_id
66
+ self._effective_token_limit = int(
67
+ self._language_model.token_limits.token_limit_input
68
+ * (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
69
+ )
66
70
 
67
71
  def _get_encoder(self, language_model: LMI) -> tiktoken.Encoding:
68
72
  name = language_model.encoder_name or "cl100k_base"
@@ -95,7 +99,7 @@ class LoopTokenReducer:
95
99
 
96
100
  while self._exceeds_token_limit(token_count):
97
101
  token_count_before_reduction = token_count
98
- loop_history = self._handle_token_limit_exceeded(loop_history)
102
+ loop_history = self._handle_token_limit_exceeded(loop_history, token_count)
99
103
  messages = self._construct_history(
100
104
  history_from_db,
101
105
  loop_history,
@@ -120,14 +124,10 @@ class LoopTokenReducer:
120
124
  len(chunks) > 1
121
125
  for chunks in self._reference_manager.get_chunks_of_all_tools()
122
126
  )
123
- max_tokens = int(
124
- self._language_model.token_limits.token_limit_input
125
- * (1 - MAX_INPUT_TOKENS_SAFETY_PERCENTAGE)
126
- )
127
127
  # TODO: This is not fully correct at the moment as the token_count
128
128
  # include system_prompt and user question already
129
129
  # TODO: There is a problem if we exceed but only have one chunk per tool call
130
- exceeds_limit = token_count > max_tokens
130
+ exceeds_limit = token_count > self._effective_token_limit
131
131
 
132
132
  return has_multiple_chunks_for_a_tool_call and exceeds_limit
133
133
 
@@ -171,16 +171,21 @@ class LoopTokenReducer:
171
171
  return constructed_history
172
172
 
173
173
  def _handle_token_limit_exceeded(
174
- self, loop_history: list[LanguageModelMessage]
174
+ self, loop_history: list[LanguageModelMessage], token_count: int
175
175
  ) -> list[LanguageModelMessage]:
176
176
  """Handle case where token limit is exceeded by reducing sources in tool responses."""
177
+ overshoot_factor = (
178
+ token_count / self._effective_token_limit
179
+ if self._effective_token_limit > 0
180
+ else 1.0
181
+ )
177
182
  self._logger.warning(
178
- f"Length of messages is exceeds limit of {self._language_model.token_limits.token_limit_input} tokens. "
179
- "Reducing number of sources per tool call.",
183
+ f"Length of messages exceeds limit of {self._effective_token_limit} tokens "
184
+ f"(overshoot factor: {overshoot_factor:.2f}x). Reducing number of sources per tool call.",
180
185
  )
181
186
 
182
187
  return self._reduce_message_length_by_reducing_sources_in_tool_response(
183
- loop_history
188
+ loop_history, overshoot_factor
184
189
  )
185
190
 
186
191
  def _replace_user_message(
@@ -312,10 +317,18 @@ class LoopTokenReducer:
312
317
  def _reduce_message_length_by_reducing_sources_in_tool_response(
313
318
  self,
314
319
  history: list[LanguageModelMessage],
320
+ overshoot_factor: float,
315
321
  ) -> list[LanguageModelMessage]:
316
322
  """
317
- Reduce the message length by removing the last source result of each tool call.
318
- If there is only one source for a tool call, the tool call message is returned unchanged.
323
+ Reduce the message length by removing sources from each tool call based on overshoot.
324
+
325
+ The number of chunks to keep per tool call is calculated as:
326
+ chunks_to_keep = num_sources / (overshoot_factor * 0.75)
327
+
328
+ This ensures more aggressive reduction when we're significantly over the limit.
329
+ Using 0.75 factor provides a safety margin to avoid over-reduction.
330
+ E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks.
331
+ Always keeps at least 1 chunk.
319
332
  """
320
333
  history_reduced: list[LanguageModelMessage] = []
321
334
  content_chunks_reduced: list[ContentChunk] = []
@@ -328,6 +341,7 @@ class LoopTokenReducer:
328
341
  message, # type: ignore
329
342
  chunk_offset,
330
343
  source_offset,
344
+ overshoot_factor,
331
345
  )
332
346
  content_chunks_reduced.extend(result.reduced_chunks)
333
347
  history_reduced.append(result.message)
@@ -350,10 +364,15 @@ class LoopTokenReducer:
350
364
  message: LanguageModelToolMessage,
351
365
  chunk_offset: int,
352
366
  source_offset: int,
367
+ overshoot_factor: float,
353
368
  ) -> SourceReductionResult:
354
369
  """
355
- Reduce the sources in the tool message by removing the last source.
356
- If there is only one source, the message is returned unchanged.
370
+ Reduce the sources in the tool message based on overshoot factor.
371
+
372
+ Chunks to keep = num_sources / (overshoot_factor * 0.75)
373
+ This ensures fewer chunks are kept when overshoot is larger.
374
+ E.g., if overshoot_factor = 2 (2x over limit), keep 1/1.5 = 2/3 of chunks
375
+ Always keeps at least 1 chunk.
357
376
  """
358
377
  tool_chunks = self._reference_manager.get_chunks_of_tool(message.tool_call_id)
359
378
  num_sources = len(tool_chunks)
@@ -366,16 +385,27 @@ class LoopTokenReducer:
366
385
  source_offset=source_offset,
367
386
  )
368
387
 
369
- # Reduce chunks, keeping all but the last one if multiple exist
370
- if num_sources == 1:
388
+ # Calculate how many chunks to keep based on overshoot
389
+ # Use 0.75 safety margin for aggressive reduction, but only when overshoot is
390
+ # significant enough (>= ~1.33). Otherwise, the margin would prevent reduction.
391
+ divisor = (
392
+ overshoot_factor * 0.75
393
+ if overshoot_factor * 0.75 >= 1.0
394
+ else overshoot_factor
395
+ )
396
+ chunks_to_keep = max(1, int(num_sources / divisor))
397
+
398
+ # Reduce chunks
399
+ if chunks_to_keep >= num_sources:
400
+ # No reduction needed for this tool call
371
401
  reduced_chunks = tool_chunks
372
402
  content_chunks_reduced = self._reference_manager.get_chunks()[
373
403
  chunk_offset : chunk_offset + num_sources
374
404
  ]
375
405
  else:
376
- reduced_chunks = tool_chunks[:-1]
406
+ reduced_chunks = tool_chunks[:chunks_to_keep]
377
407
  content_chunks_reduced = self._reference_manager.get_chunks()[
378
- chunk_offset : chunk_offset + num_sources - 1
408
+ chunk_offset : chunk_offset + chunks_to_keep
379
409
  ]
380
410
  self._reference_manager.replace_chunks_of_tool(
381
411
  message.tool_call_id, reduced_chunks
@@ -392,7 +422,7 @@ class LoopTokenReducer:
392
422
  message=new_message,
393
423
  reduced_chunks=content_chunks_reduced,
394
424
  chunk_offset=chunk_offset + num_sources,
395
- source_offset=source_offset + num_sources - (1 if num_sources != 1 else 0),
425
+ source_offset=source_offset + len(reduced_chunks),
396
426
  )
397
427
 
398
428
  def _create_tool_call_message_with_reduced_sources(
@@ -0,0 +1,19 @@
1
+ from unique_toolkit.agentic.loop_runner.base import LoopIterationRunner
2
+ from unique_toolkit.agentic.loop_runner.middleware import (
3
+ PlanningConfig,
4
+ PlanningMiddleware,
5
+ PlanningSchemaConfig,
6
+ )
7
+ from unique_toolkit.agentic.loop_runner.runners import (
8
+ BasicLoopIterationRunner,
9
+ BasicLoopIterationRunnerConfig,
10
+ )
11
+
12
+ __all__ = [
13
+ "LoopIterationRunner",
14
+ "PlanningConfig",
15
+ "PlanningMiddleware",
16
+ "PlanningSchemaConfig",
17
+ "BasicLoopIterationRunnerConfig",
18
+ "BasicLoopIterationRunner",
19
+ ]
@@ -0,0 +1,57 @@
1
+ from typing import Any, Required
2
+
3
+ from openai.types.chat import ChatCompletionNamedToolChoiceParam
4
+ from typing_extensions import TypedDict
5
+
6
+ from unique_toolkit import LanguageModelToolDescription
7
+ from unique_toolkit.agentic.loop_runner.base import _LoopIterationRunnerKwargs
8
+ from unique_toolkit.chat.functions import LanguageModelStreamResponse
9
+ from unique_toolkit.chat.service import LanguageModelMessages
10
+ from unique_toolkit.content import ContentChunk
11
+
12
+
13
+ class _StreamingHandlerKwargs(TypedDict, total=False):
14
+ messages: Required[LanguageModelMessages]
15
+ model_name: Required[str]
16
+ tools: list[LanguageModelToolDescription]
17
+ content_chunks: list[ContentChunk]
18
+ start_text: str
19
+ debug_info: dict[str, Any]
20
+ temperature: float
21
+ tool_choice: ChatCompletionNamedToolChoiceParam
22
+ other_options: dict[str, Any]
23
+
24
+
25
+ def _extract_streaming_kwargs(
26
+ kwargs: _LoopIterationRunnerKwargs,
27
+ ) -> _StreamingHandlerKwargs:
28
+ res = _StreamingHandlerKwargs(
29
+ messages=kwargs["messages"],
30
+ model_name=kwargs["model"].name,
31
+ )
32
+
33
+ for k in [
34
+ "tools",
35
+ "content_chunks",
36
+ "start_text",
37
+ "debug_info",
38
+ "temperature",
39
+ "other_options",
40
+ ]:
41
+ if k in kwargs:
42
+ res[k] = kwargs[k]
43
+
44
+ return res
45
+
46
+
47
+ async def stream_response(
48
+ loop_runner_kwargs: _LoopIterationRunnerKwargs,
49
+ **kwargs,
50
+ ) -> LanguageModelStreamResponse:
51
+ streaming_handler = loop_runner_kwargs["streaming_handler"]
52
+ streaming_hander_kwargs = _extract_streaming_kwargs(loop_runner_kwargs)
53
+ streaming_hander_kwargs.update(**kwargs)
54
+
55
+ return await streaming_handler.complete_with_references_async(
56
+ **streaming_hander_kwargs
57
+ )
@@ -0,0 +1,38 @@
1
+ from typing import Any, Protocol, Required, Unpack
2
+
3
+ from openai.types.chat import ChatCompletionNamedToolChoiceParam
4
+ from typing_extensions import TypedDict
5
+
6
+ from unique_toolkit import LanguageModelToolDescription
7
+ from unique_toolkit.chat.functions import LanguageModelStreamResponse
8
+ from unique_toolkit.chat.service import LanguageModelMessages
9
+ from unique_toolkit.content import ContentChunk
10
+ from unique_toolkit.language_model.infos import LanguageModelInfo
11
+ from unique_toolkit.protocols.support import (
12
+ SupportCompleteWithReferences,
13
+ )
14
+
15
+
16
+ class _LoopIterationRunnerKwargs(TypedDict, total=False):
17
+ iteration_index: Required[int]
18
+ streaming_handler: Required[SupportCompleteWithReferences]
19
+ messages: Required[LanguageModelMessages]
20
+ model: Required[LanguageModelInfo]
21
+ tools: list[LanguageModelToolDescription]
22
+ content_chunks: list[ContentChunk]
23
+ start_text: str
24
+ debug_info: dict[str, Any]
25
+ temperature: float
26
+ tool_choices: list[ChatCompletionNamedToolChoiceParam]
27
+ other_options: dict[str, Any]
28
+
29
+
30
+ class LoopIterationRunner(Protocol):
31
+ """
32
+ A loop iteration runner is responsible for running a single iteration of the loop, and returning the stream response for that iteration.
33
+ """
34
+
35
+ async def __call__(
36
+ self,
37
+ **kwargs: Unpack[_LoopIterationRunnerKwargs],
38
+ ) -> LanguageModelStreamResponse: ...
@@ -0,0 +1,7 @@
1
+ from unique_toolkit.agentic.loop_runner.middleware.planning import (
2
+ PlanningConfig,
3
+ PlanningMiddleware,
4
+ PlanningSchemaConfig,
5
+ )
6
+
7
+ __all__ = ["PlanningConfig", "PlanningMiddleware", "PlanningSchemaConfig"]
@@ -0,0 +1,9 @@
1
+ from unique_toolkit.agentic.loop_runner.middleware.planning.planning import (
2
+ PlanningConfig,
3
+ PlanningMiddleware,
4
+ )
5
+ from unique_toolkit.agentic.loop_runner.middleware.planning.schema import (
6
+ PlanningSchemaConfig,
7
+ )
8
+
9
+ __all__ = ["PlanningMiddleware", "PlanningConfig", "PlanningSchemaConfig"]
@@ -0,0 +1,85 @@
1
+ import json
2
+ import logging
3
+ from typing import Unpack
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from unique_toolkit import LanguageModelService
8
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
+ from unique_toolkit.agentic.history_manager.history_manager import HistoryManager
10
+ from unique_toolkit.agentic.loop_runner.base import (
11
+ LoopIterationRunner,
12
+ _LoopIterationRunnerKwargs,
13
+ )
14
+ from unique_toolkit.agentic.loop_runner.middleware.planning.schema import (
15
+ PlanningSchemaConfig,
16
+ get_planning_schema,
17
+ )
18
+ from unique_toolkit.agentic.tools.utils import failsafe_async
19
+ from unique_toolkit.chat.service import LanguageModelStreamResponse
20
+ from unique_toolkit.language_model import (
21
+ LanguageModelAssistantMessage,
22
+ )
23
+
24
+ _LOGGER = logging.getLogger(__name__)
25
+
26
+
27
+ class PlanningConfig(BaseModel):
28
+ model_config = get_configuration_dict()
29
+
30
+ planning_schema_config: PlanningSchemaConfig = PlanningSchemaConfig()
31
+
32
+
33
+ class PlanningMiddleware(LoopIterationRunner):
34
+ def __init__(
35
+ self,
36
+ *,
37
+ loop_runner: LoopIterationRunner,
38
+ config: PlanningConfig,
39
+ llm_service: LanguageModelService,
40
+ history_manager: HistoryManager | None = None,
41
+ ) -> None:
42
+ self._config = config
43
+ self._loop_runner = loop_runner
44
+ self._history_manager = history_manager
45
+ self._llm_service = llm_service
46
+
47
+ @failsafe_async(failure_return_value=None, logger=_LOGGER)
48
+ async def _run_plan_step(
49
+ self, **kwargs: Unpack[_LoopIterationRunnerKwargs]
50
+ ) -> LanguageModelAssistantMessage | None:
51
+ planning_schema = get_planning_schema(self._config.planning_schema_config)
52
+
53
+ response = await self._llm_service.complete_async(
54
+ messages=kwargs["messages"],
55
+ model_name=kwargs["model"].name,
56
+ structured_output_model=planning_schema,
57
+ other_options=kwargs.get("other_options", {}),
58
+ )
59
+
60
+ if response.choices[0].message.parsed is None:
61
+ _LOGGER.info("Error parsing planning response")
62
+ return None
63
+
64
+ return LanguageModelAssistantMessage(
65
+ content=json.dumps(response.choices[0].message.parsed)
66
+ )
67
+
68
+ async def __call__(
69
+ self, **kwargs: Unpack[_LoopIterationRunnerKwargs]
70
+ ) -> LanguageModelStreamResponse:
71
+ assistant_message = await self._run_plan_step(**kwargs)
72
+
73
+ if assistant_message is None:
74
+ _LOGGER.info(
75
+ "Error executing planning step, proceeding without planning step"
76
+ )
77
+ return await self._loop_runner(**kwargs)
78
+
79
+ if self._history_manager is not None:
80
+ self._history_manager.add_assistant_message(assistant_message)
81
+
82
+ kwargs["messages"] = (
83
+ kwargs["messages"].builder().append(assistant_message).build()
84
+ )
85
+ return await self._loop_runner(**kwargs)
@@ -0,0 +1,82 @@
1
+ import json
2
+ from typing import Annotated, Any
3
+
4
+ from pydantic import BaseModel, Field, RootModel, create_model
5
+
6
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
7
+
8
+ _PLANNING_SCHEMA_DESCRIPTION = """
9
+ Think about the next step to take.
10
+
11
+ Instructions:
12
+ - Consider the user input and the context of the conversation.
13
+ - Consider any previous tool calls, their results and the instructions related to the available tool calls.
14
+ - Consider any failed tool calls.
15
+ Goals:
16
+ - Output a plan for the next step. It MUST be justified, meaning that you MUST explain why you choose to take this step.
17
+ - You MUST recover from any failed tool calls.
18
+ - You MUST explain what tool calls to call next and why.
19
+ - If ready to answer the user, justify why you have gathered enough information/ tried all possible ways and failed.
20
+ - If ready to answer the user, REMEMBER and mention any previous instructions you have in the history. This is a CRUCIAL step.
21
+
22
+ IMPORTANT:
23
+ - Tools will be available after the planning step.
24
+ """.strip()
25
+
26
+ _DEFAULT_PLANNING_PARAM_DESCRIPTION = """
27
+ Next step description:
28
+ - Decide what to do next.
29
+ - Justify it THOROUGLY.
30
+ """.strip()
31
+
32
+
33
+ class DefaultPlanningSchemaConfig(BaseModel):
34
+ """
35
+ Configuration for the default planning schema, which is a simple json with a single field: "planning".
36
+ """
37
+
38
+ model_config = get_configuration_dict()
39
+
40
+ description: str = Field(
41
+ default=_PLANNING_SCHEMA_DESCRIPTION,
42
+ description="Description of the planning schema. This will correspond to the description of the model in the json schema.",
43
+ )
44
+ plan_param_description: str = Field(
45
+ default=_DEFAULT_PLANNING_PARAM_DESCRIPTION,
46
+ description="The description of the `planning` parameter.",
47
+ )
48
+
49
+
50
+ class PlanningSchemaConfig(RootModel[DefaultPlanningSchemaConfig | str]):
51
+ model_config = get_configuration_dict()
52
+
53
+ root: (
54
+ Annotated[
55
+ DefaultPlanningSchemaConfig,
56
+ Field(
57
+ description="Configuration for the default planning schema, which is a simple json dict with a single `plan` field.",
58
+ title="Default Planning Schema",
59
+ ),
60
+ ]
61
+ | Annotated[
62
+ str,
63
+ Field(
64
+ description="Custom JSON Schema as string for the planning schema.",
65
+ title="Custom Planning Schema",
66
+ ),
67
+ ]
68
+ ) = Field(default=DefaultPlanningSchemaConfig())
69
+
70
+
71
+ def get_planning_schema(config: PlanningSchemaConfig) -> dict[str, Any]:
72
+ if isinstance(config.root, DefaultPlanningSchemaConfig):
73
+ return create_model(
74
+ "Planning",
75
+ plan=(
76
+ str,
77
+ Field(description=config.root.plan_param_description),
78
+ ),
79
+ __doc__=config.root.description,
80
+ ).model_json_schema()
81
+
82
+ return json.loads(config.root)
@@ -0,0 +1,6 @@
1
+ from unique_toolkit.agentic.loop_runner.runners.basic import (
2
+ BasicLoopIterationRunner,
3
+ BasicLoopIterationRunnerConfig,
4
+ )
5
+
6
+ __all__ = ["BasicLoopIterationRunnerConfig", "BasicLoopIterationRunner"]
@@ -0,0 +1,94 @@
1
+ import logging
2
+ from typing import Unpack, override
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
7
+ from unique_toolkit.agentic.loop_runner._stream_handler_utils import stream_response
8
+ from unique_toolkit.agentic.loop_runner.base import (
9
+ LoopIterationRunner,
10
+ _LoopIterationRunnerKwargs,
11
+ )
12
+ from unique_toolkit.chat.functions import LanguageModelStreamResponse
13
+ from unique_toolkit.protocols.support import (
14
+ ResponsesLanguageModelStreamResponse,
15
+ )
16
+
17
+ _LOGGER = logging.getLogger(__name__)
18
+
19
+
20
+ class BasicLoopIterationRunnerConfig(BaseModel):
21
+ model_config = get_configuration_dict()
22
+ max_loop_iterations: int
23
+
24
+
25
+ class BasicLoopIterationRunner(LoopIterationRunner):
26
+ def __init__(self, config: BasicLoopIterationRunnerConfig) -> None:
27
+ self._config = config
28
+
29
+ async def _handle_last_iteration(
30
+ self, **kwargs: Unpack[_LoopIterationRunnerKwargs]
31
+ ) -> LanguageModelStreamResponse:
32
+ _LOGGER.info(
33
+ "Reached last iteration, removing tools and producing final response"
34
+ )
35
+
36
+ return await stream_response(
37
+ loop_runner_kwargs=kwargs,
38
+ tools=None,
39
+ )
40
+
41
+ async def _handle_normal_iteration(
42
+ self, **kwargs: Unpack[_LoopIterationRunnerKwargs]
43
+ ) -> LanguageModelStreamResponse:
44
+ _LOGGER.info("Running loop iteration %d", kwargs["iteration_index"])
45
+
46
+ return await stream_response(loop_runner_kwargs=kwargs)
47
+
48
+ async def _handle_forced_tools_iteration(
49
+ self,
50
+ **kwargs: Unpack[_LoopIterationRunnerKwargs],
51
+ ) -> LanguageModelStreamResponse:
52
+ assert "tool_choices" in kwargs
53
+
54
+ tool_choices = kwargs["tool_choices"]
55
+ _LOGGER.info("Forcing tools calls: %s", tool_choices)
56
+
57
+ responses: list[LanguageModelStreamResponse] = []
58
+
59
+ for opt in tool_choices:
60
+ responses.append(
61
+ await stream_response(
62
+ loop_runner_kwargs=kwargs,
63
+ tool_choice=opt,
64
+ )
65
+ )
66
+
67
+ # Merge responses and refs:
68
+ tool_calls = []
69
+ references = []
70
+ for r in responses:
71
+ if r.tool_calls:
72
+ tool_calls.extend(r.tool_calls)
73
+ references.extend(r.message.references)
74
+
75
+ response = responses[0]
76
+ response.tool_calls = tool_calls if len(tool_calls) > 0 else None
77
+ response.message.references = references
78
+
79
+ return response
80
+
81
+ @override
82
+ async def __call__(
83
+ self,
84
+ **kwargs: Unpack[_LoopIterationRunnerKwargs],
85
+ ) -> LanguageModelStreamResponse | ResponsesLanguageModelStreamResponse:
86
+ tool_choices = kwargs.get("tool_choices", [])
87
+ iteration_index = kwargs["iteration_index"]
88
+
89
+ if len(tool_choices) > 0 and iteration_index == 0:
90
+ return await self._handle_forced_tools_iteration(**kwargs)
91
+ elif iteration_index == self._config.max_loop_iterations - 1:
92
+ return await self._handle_last_iteration(**kwargs)
93
+ else:
94
+ return await self._handle_normal_iteration(**kwargs)
@@ -46,7 +46,7 @@ def complete(
46
46
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
47
47
  tools: list[LanguageModelTool | LanguageModelToolDescription] | None = None,
48
48
  other_options: dict | None = None,
49
- structured_output_model: type[BaseModel] | None = None,
49
+ structured_output_model: type[BaseModel] | dict[str, Any] | None = None,
50
50
  structured_output_enforce_schema: bool = False,
51
51
  user_id: str | None = None,
52
52
  ) -> LanguageModelResponse:
@@ -110,7 +110,7 @@ async def complete_async(
110
110
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
111
111
  tools: list[LanguageModelTool | LanguageModelToolDescription] | None = None,
112
112
  other_options: dict | None = None,
113
- structured_output_model: type[BaseModel] | None = None,
113
+ structured_output_model: type[BaseModel] | dict[str, Any] | None = None,
114
114
  structured_output_enforce_schema: bool = False,
115
115
  ) -> LanguageModelResponse:
116
116
  """Call the completion endpoint asynchronously without streaming the response.
@@ -214,9 +214,21 @@ def _to_search_context(
214
214
 
215
215
  def _add_response_format_to_options(
216
216
  options: dict,
217
- structured_output_model: type[BaseModel],
217
+ structured_output_model: type[BaseModel] | dict[str, Any],
218
218
  structured_output_enforce_schema: bool = False,
219
219
  ) -> dict:
220
+ if isinstance(structured_output_model, dict):
221
+ name = structured_output_model.get("title", "DefaultName")
222
+ options["responseFormat"] = {
223
+ "type": "json_schema",
224
+ "json_schema": {
225
+ "name": name,
226
+ "strict": structured_output_enforce_schema,
227
+ "schema": structured_output_model,
228
+ },
229
+ }
230
+ return options
231
+
220
232
  options["responseFormat"] = {
221
233
  "type": "json_schema",
222
234
  "json_schema": {
@@ -235,7 +247,7 @@ def _prepare_completion_params_util(
235
247
  tools: Sequence[LanguageModelTool | LanguageModelToolDescription] | None = None,
236
248
  other_options: dict | None = None,
237
249
  content_chunks: list[ContentChunk] | None = None,
238
- structured_output_model: type[BaseModel] | None = None,
250
+ structured_output_model: type[BaseModel] | dict[str, Any] | None = None,
239
251
  structured_output_enforce_schema: bool = False,
240
252
  ) -> tuple[dict, str, dict, SearchContext | None]:
241
253
  """Prepare common parameters for completion requests.
@@ -283,7 +295,7 @@ def _prepare_openai_completion_params_util(
283
295
  tools: Sequence[LanguageModelTool | LanguageModelToolDescription] | None = None,
284
296
  other_options: dict | None = None,
285
297
  content_chunks: list[ContentChunk] | None = None,
286
- structured_output_model: type[BaseModel] | None = None,
298
+ structured_output_model: type[BaseModel] | dict[str, Any] | None = None,
287
299
  structured_output_enforce_schema: bool = False,
288
300
  ) -> tuple[dict, str, SearchContext | None]:
289
301
  """Prepare common parameters for completion requests.
@@ -355,7 +367,7 @@ def _prepare_all_completions_params_util(
355
367
  other_options: dict | None = None,
356
368
  content_chunks: list[ContentChunk] | None = None,
357
369
  tool_choice: ChatCompletionToolChoiceOptionParam | None = None,
358
- structured_output_model: type[BaseModel] | None = None,
370
+ structured_output_model: type[BaseModel] | dict[str, Any] | None = None,
359
371
  structured_output_enforce_schema: bool = False,
360
372
  ) -> tuple[
361
373
  dict,
@@ -226,7 +226,7 @@ class LanguageModelService:
226
226
  temperature: float = DEFAULT_COMPLETE_TEMPERATURE,
227
227
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
228
228
  tools: Optional[list[LanguageModelTool | LanguageModelToolDescription]] = None,
229
- structured_output_model: Optional[Type[BaseModel]] = None,
229
+ structured_output_model: Optional[Type[BaseModel] | dict[str, Any]] = None,
230
230
  structured_output_enforce_schema: bool = False,
231
231
  other_options: Optional[dict] = None,
232
232
  ) -> LanguageModelResponse:
@@ -254,7 +254,7 @@ class LanguageModelService:
254
254
  temperature: float = DEFAULT_COMPLETE_TEMPERATURE,
255
255
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
256
256
  tools: Optional[list[LanguageModelTool | LanguageModelToolDescription]] = None,
257
- structured_output_model: Optional[Type[BaseModel]] = None,
257
+ structured_output_model: Optional[Type[BaseModel] | dict[str, Any]] = None,
258
258
  structured_output_enforce_schema: bool = False,
259
259
  other_options: Optional[dict] = None,
260
260
  ) -> LanguageModelResponse:
@@ -286,7 +286,7 @@ class LanguageModelService:
286
286
  temperature: float = DEFAULT_COMPLETE_TEMPERATURE,
287
287
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
288
288
  tools: Optional[list[LanguageModelTool | LanguageModelToolDescription]] = None,
289
- structured_output_model: Optional[Type[BaseModel]] = None,
289
+ structured_output_model: Optional[Type[BaseModel] | dict[str, Any]] = None,
290
290
  structured_output_enforce_schema: bool = False,
291
291
  other_options: Optional[dict] = None,
292
292
  ) -> LanguageModelResponse:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 1.34.1
3
+ Version: 1.35.1
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Cedric Klinkert
@@ -121,6 +121,12 @@ All notable changes to this project will be documented in this file.
121
121
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
122
122
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
123
123
 
124
+ ## [1.35.1] - 2025-12-05
125
+ - Improve efficiency of token reducer if tool calls overshoot max token limit
126
+
127
+ ## [1.35.0] - 2025-12-04
128
+ - Add `LoopIterationRunner` abstraction and support for planning before every loop iteration.
129
+
124
130
  ## [1.34.1] - 2025-12-02
125
131
  - Update code interpreter tool instructions.
126
132
 
@@ -60,8 +60,17 @@ unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py,sha256
60
60
  unique_toolkit/agentic/evaluation/tests/test_output_parser.py,sha256=RN_HcBbU6qy_e_PoYyUFcjWnp3ymJ6-gLj6TgEOupAI,3107
61
61
  unique_toolkit/agentic/history_manager/history_construction_with_contents.py,sha256=kzxpVzTtQqL8TjdIvOy7gkRVxD4BsOMyimECryg7vdc,9060
62
62
  unique_toolkit/agentic/history_manager/history_manager.py,sha256=7V7_173XkAjc8otBACF0G3dbqRs34FSlURbBPrE95Wk,9537
63
- unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=4XUX2-yVBnaYthV8p0zj2scVBUdK_3IhxBgoNlrytyQ,18498
63
+ unique_toolkit/agentic/history_manager/loop_token_reducer.py,sha256=PUNR2aTFBUQjD1y3gJC9YlseBUYjbQ68qfig9a65e7w,19824
64
64
  unique_toolkit/agentic/history_manager/utils.py,sha256=VIn_UmcR3jHtpux0qp5lQQzczgAm8XYSeQiPo87jC3A,3143
65
+ unique_toolkit/agentic/loop_runner/__init__.py,sha256=QLCYmIyfcKQEbuv1Xm0VuR_xC8JyD2_aMIvt1TRFzvw,517
66
+ unique_toolkit/agentic/loop_runner/_stream_handler_utils.py,sha256=FTGc5y8wkDnwnRVSYEdandgKz-FiySOsrTFFMadwP6E,1706
67
+ unique_toolkit/agentic/loop_runner/base.py,sha256=3g4PalzV00o8kcRwHds2c2rtxW4idD7_7vS2Z7GkMvQ,1370
68
+ unique_toolkit/agentic/loop_runner/middleware/__init__.py,sha256=_yeRH8xYigfJZyQ5-5lZUo2RXDJkGfftCQrKFm2rWb4,217
69
+ unique_toolkit/agentic/loop_runner/middleware/planning/__init__.py,sha256=Y9MlihNA8suNREixW98RF45bj0EMtD_tQuDrO2MEML4,304
70
+ unique_toolkit/agentic/loop_runner/middleware/planning/planning.py,sha256=QiXqTGxQ-9Puc79blTBadD0piU7SY5INyndCdanfqA8,2877
71
+ unique_toolkit/agentic/loop_runner/middleware/planning/schema.py,sha256=76C36CWCLfDAYYqtaQlhXsmkWM1fCqf8j-l5afQREKA,2869
72
+ unique_toolkit/agentic/loop_runner/runners/__init__.py,sha256=raaNpHcTfXkYURy0ysyacispSdQzYPDoG17PyR57uK4,205
73
+ unique_toolkit/agentic/loop_runner/runners/basic.py,sha256=3swSPsefV1X-ltUC8iNAOrn9PL0abUUfWXJjhM4sShA,3116
65
74
  unique_toolkit/agentic/message_log_manager/__init__.py,sha256=3-KY_sGkPbNoSnrzwPY0FQIJNnsz4NHXvocXgGRUeuE,169
66
75
  unique_toolkit/agentic/message_log_manager/service.py,sha256=AiuIq2dKQg9Y8bEYgGcve1X8-WRRdqPZXaZXXLJxfFM,3057
67
76
  unique_toolkit/agentic/postprocessor/postprocessor_manager.py,sha256=s6HFhA61TE05aAay15NFTWI1JvdSlxmGpEVfpBbGFyM,7684
@@ -180,12 +189,12 @@ unique_toolkit/language_model/_responses_api_utils.py,sha256=LmxMIuKFDm70PPGZIcb
180
189
  unique_toolkit/language_model/builder.py,sha256=4OKfwJfj3TrgO1ezc_ewIue6W7BCQ2ZYQXUckWVPPTA,3369
181
190
  unique_toolkit/language_model/constants.py,sha256=B-topqW0r83dkC_25DeQfnPk3n53qzIHUCBS7YJ0-1U,119
182
191
  unique_toolkit/language_model/default_language_model.py,sha256=-_DBsJhLCsFdaU4ynAkyW0jYIl2lhrPybZm1K-GgVJs,125
183
- unique_toolkit/language_model/functions.py,sha256=nGxlV4OO70jdH_7AgRWDMpbzmmKLZ-5Tk4gu5nxB2ko,17735
192
+ unique_toolkit/language_model/functions.py,sha256=PTBm2BBkuqISVHoyUqMIGHGXT-RMSAfz0F_Ylo2esQ8,18246
184
193
  unique_toolkit/language_model/infos.py,sha256=sZJOOij-dfReDxJWfd7ZwP3qx4KcN1LVqNchRafKmrY,79877
185
194
  unique_toolkit/language_model/prompt.py,sha256=JSawaLjQg3VR-E2fK8engFyJnNdk21zaO8pPIodzN4Q,3991
186
195
  unique_toolkit/language_model/reference.py,sha256=nkX2VFz-IrUz8yqyc3G5jUMNwrNpxITBrMEKkbqqYoI,8583
187
196
  unique_toolkit/language_model/schemas.py,sha256=ATiHjhfGxoubS332XuhL9PKSoFewcWvPTUVBaNGWlJo,23994
188
- unique_toolkit/language_model/service.py,sha256=rt5LoQnDifIa5TnykGkJNl03lj0QhCMdXSdNA-bXn1c,11904
197
+ unique_toolkit/language_model/service.py,sha256=fI2S5JLawJRRkKg086Ysz2Of4AOBHPN-4tdsUtagdjs,11955
189
198
  unique_toolkit/language_model/utils.py,sha256=bPQ4l6_YO71w-zaIPanUUmtbXC1_hCvLK0tAFc3VCRc,1902
190
199
  unique_toolkit/protocols/support.py,sha256=ZEnbQL5w2-T_1AeM8OHycZJ3qbdfVI1nXe0nL9esQEw,5544
191
200
  unique_toolkit/services/__init__.py,sha256=90-IT5FjMcnlqxjp5kme9Fqgp_on46rggctIqHMdqsw,195
@@ -199,7 +208,7 @@ unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBu
199
208
  unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
200
209
  unique_toolkit/smart_rules/compile.py,sha256=Ozhh70qCn2yOzRWr9d8WmJeTo7AQurwd3tStgBMPFLA,1246
201
210
  unique_toolkit/test_utilities/events.py,sha256=_mwV2bs5iLjxS1ynDCjaIq-gjjKhXYCK-iy3dRfvO3g,6410
202
- unique_toolkit-1.34.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
203
- unique_toolkit-1.34.1.dist-info/METADATA,sha256=Ebl8qfOh2p3WyhNkml3We-HIKFemQ6goX9hDjoC8jEo,45594
204
- unique_toolkit-1.34.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
205
- unique_toolkit-1.34.1.dist-info/RECORD,,
211
+ unique_toolkit-1.35.1.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
212
+ unique_toolkit-1.35.1.dist-info/METADATA,sha256=yqoVsVNxeGXbw7h5u9I7bLt_jcB1FF-yonYPGGgUS0o,45818
213
+ unique_toolkit-1.35.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
214
+ unique_toolkit-1.35.1.dist-info/RECORD,,