symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +269 -173
- symai/backend/base.py +123 -110
- symai/backend/engines/drawing/engine_bfl.py +45 -44
- symai/backend/engines/drawing/engine_gpt_image.py +112 -97
- symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
- symai/backend/engines/embedding/engine_openai.py +25 -21
- symai/backend/engines/execute/engine_python.py +19 -18
- symai/backend/engines/files/engine_io.py +104 -95
- symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
- symai/backend/engines/index/engine_pinecone.py +124 -97
- symai/backend/engines/index/engine_qdrant.py +1011 -0
- symai/backend/engines/index/engine_vectordb.py +84 -56
- symai/backend/engines/lean/engine_lean4.py +96 -52
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
- symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
- symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
- symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
- symai/backend/engines/ocr/engine_apilayer.py +23 -27
- symai/backend/engines/output/engine_stdout.py +10 -13
- symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
- symai/backend/engines/search/engine_openai.py +100 -88
- symai/backend/engines/search/engine_parallel.py +665 -0
- symai/backend/engines/search/engine_perplexity.py +44 -45
- symai/backend/engines/search/engine_serpapi.py +37 -34
- symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
- symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
- symai/backend/engines/text_to_speech/engine_openai.py +20 -26
- symai/backend/engines/text_vision/engine_clip.py +39 -37
- symai/backend/engines/userinput/engine_console.py +5 -6
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +48 -38
- symai/backend/mixin/deepseek.py +6 -5
- symai/backend/mixin/google.py +7 -4
- symai/backend/mixin/groq.py +2 -4
- symai/backend/mixin/openai.py +140 -110
- symai/backend/settings.py +87 -20
- symai/chat.py +216 -123
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +80 -70
- symai/collect/pipeline.py +67 -51
- symai/collect/stats.py +161 -109
- symai/components.py +707 -360
- symai/constraints.py +24 -12
- symai/core.py +1857 -1233
- symai/core_ext.py +83 -80
- symai/endpoints/api.py +166 -104
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +29 -21
- symai/extended/arxiv_pdf_parser.py +23 -14
- symai/extended/bibtex_parser.py +9 -6
- symai/extended/conversation.py +156 -126
- symai/extended/document.py +50 -30
- symai/extended/file_merger.py +57 -14
- symai/extended/graph.py +51 -32
- symai/extended/html_style_template.py +18 -14
- symai/extended/interfaces/blip_2.py +2 -3
- symai/extended/interfaces/clip.py +4 -3
- symai/extended/interfaces/console.py +9 -1
- symai/extended/interfaces/dall_e.py +4 -2
- symai/extended/interfaces/file.py +2 -0
- symai/extended/interfaces/flux.py +4 -2
- symai/extended/interfaces/gpt_image.py +16 -7
- symai/extended/interfaces/input.py +2 -1
- symai/extended/interfaces/llava.py +1 -2
- symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
- symai/extended/interfaces/naive_vectordb.py +9 -10
- symai/extended/interfaces/ocr.py +5 -3
- symai/extended/interfaces/openai_search.py +2 -0
- symai/extended/interfaces/parallel.py +30 -0
- symai/extended/interfaces/perplexity.py +2 -0
- symai/extended/interfaces/pinecone.py +12 -9
- symai/extended/interfaces/python.py +2 -0
- symai/extended/interfaces/serpapi.py +3 -1
- symai/extended/interfaces/terminal.py +2 -4
- symai/extended/interfaces/tts.py +3 -2
- symai/extended/interfaces/whisper.py +3 -2
- symai/extended/interfaces/wolframalpha.py +2 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +14 -13
- symai/extended/os_command.py +39 -29
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +51 -43
- symai/extended/packages/sympkg.py +41 -35
- symai/extended/packages/symrun.py +63 -50
- symai/extended/repo_cloner.py +14 -12
- symai/extended/seo_query_optimizer.py +15 -13
- symai/extended/solver.py +116 -91
- symai/extended/summarizer.py +12 -10
- symai/extended/taypan_interpreter.py +17 -18
- symai/extended/vectordb.py +122 -92
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +51 -47
- symai/formatter/regex.py +70 -69
- symai/functional.py +325 -176
- symai/imports.py +190 -147
- symai/interfaces.py +57 -28
- symai/memory.py +45 -35
- symai/menu/screen.py +28 -19
- symai/misc/console.py +66 -56
- symai/misc/loader.py +8 -5
- symai/models/__init__.py +17 -1
- symai/models/base.py +395 -236
- symai/models/errors.py +1 -2
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +24 -25
- symai/ops/primitives.py +1149 -731
- symai/post_processors.py +58 -50
- symai/pre_processors.py +86 -82
- symai/processor.py +21 -13
- symai/prompts.py +764 -685
- symai/server/huggingface_server.py +135 -49
- symai/server/llama_cpp_server.py +21 -11
- symai/server/qdrant_server.py +206 -0
- symai/shell.py +100 -42
- symai/shellsv.py +700 -492
- symai/strategy.py +630 -346
- symai/symbol.py +368 -322
- symai/utils.py +100 -78
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
- symbolicai-1.1.0.dist-info/RECORD +168 -0
- symbolicai-0.21.0.dist-info/RECORD +0 -162
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,8 @@ import openai
|
|
|
7
7
|
import tiktoken
|
|
8
8
|
|
|
9
9
|
from ....components import SelfPrompt
|
|
10
|
-
from ....misc.console import ConsoleStyle
|
|
11
10
|
from ....symbol import Symbol
|
|
12
|
-
from ....utils import
|
|
11
|
+
from ....utils import UserMessage, encode_media_frames
|
|
13
12
|
from ...base import Engine
|
|
14
13
|
from ...mixin.openai import OpenAIMixin
|
|
15
14
|
from ...settings import SYMAI_CONFIG
|
|
@@ -27,17 +26,17 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
27
26
|
self.config = deepcopy(SYMAI_CONFIG)
|
|
28
27
|
# In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
|
|
29
28
|
if api_key is not None and model is not None:
|
|
30
|
-
self.config[
|
|
31
|
-
self.config[
|
|
32
|
-
if self.id() !=
|
|
33
|
-
return
|
|
34
|
-
openai.api_key = self.config[
|
|
35
|
-
self.model = self.config[
|
|
29
|
+
self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] = api_key
|
|
30
|
+
self.config["NEUROSYMBOLIC_ENGINE_MODEL"] = model
|
|
31
|
+
if self.id() != "neurosymbolic":
|
|
32
|
+
return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
|
|
33
|
+
openai.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
|
|
34
|
+
self.model = self.config["NEUROSYMBOLIC_ENGINE_MODEL"]
|
|
36
35
|
self.name = self.__class__.__name__
|
|
37
36
|
try:
|
|
38
37
|
self.tokenizer = tiktoken.encoding_for_model(self.model)
|
|
39
|
-
except Exception
|
|
40
|
-
self.tokenizer = tiktoken.get_encoding(
|
|
38
|
+
except Exception:
|
|
39
|
+
self.tokenizer = tiktoken.get_encoding("o200k_base")
|
|
41
40
|
self.max_context_tokens = self.api_max_context_tokens()
|
|
42
41
|
self.max_response_tokens = self.api_max_response_tokens()
|
|
43
42
|
self.seed = None
|
|
@@ -45,63 +44,71 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
45
44
|
try:
|
|
46
45
|
self.client = openai.Client(api_key=openai.api_key)
|
|
47
46
|
except Exception as e:
|
|
48
|
-
|
|
47
|
+
UserMessage(
|
|
48
|
+
f"Failed to initialize OpenAI client. Please check your OpenAI library version. Caused by: {e}",
|
|
49
|
+
raise_with=ValueError,
|
|
50
|
+
)
|
|
49
51
|
|
|
50
52
|
def id(self) -> str:
|
|
51
|
-
if self.config.get(
|
|
52
|
-
|
|
53
|
-
self.config.get(
|
|
54
|
-
self.config.get(
|
|
55
|
-
self.config.get(
|
|
56
|
-
self.config.get(
|
|
57
|
-
self.config.get(
|
|
58
|
-
|
|
59
|
-
|
|
53
|
+
if self.config.get("NEUROSYMBOLIC_ENGINE_MODEL") and (
|
|
54
|
+
self.config.get("NEUROSYMBOLIC_ENGINE_MODEL").startswith("o1")
|
|
55
|
+
or self.config.get("NEUROSYMBOLIC_ENGINE_MODEL").startswith("o3")
|
|
56
|
+
or self.config.get("NEUROSYMBOLIC_ENGINE_MODEL").startswith("o4")
|
|
57
|
+
or self.config.get("NEUROSYMBOLIC_ENGINE_MODEL") == "gpt-5"
|
|
58
|
+
or self.config.get("NEUROSYMBOLIC_ENGINE_MODEL") == "gpt-5.1"
|
|
59
|
+
or self.config.get("NEUROSYMBOLIC_ENGINE_MODEL") == "gpt-5-mini"
|
|
60
|
+
or self.config.get("NEUROSYMBOLIC_ENGINE_MODEL") == "gpt-5-nano"
|
|
61
|
+
):
|
|
62
|
+
return "neurosymbolic"
|
|
63
|
+
return super().id() # default to unregistered
|
|
60
64
|
|
|
61
65
|
def command(self, *args, **kwargs):
|
|
62
66
|
super().command(*args, **kwargs)
|
|
63
|
-
if
|
|
64
|
-
openai.api_key = kwargs[
|
|
65
|
-
if
|
|
66
|
-
self.model = kwargs[
|
|
67
|
-
if
|
|
68
|
-
self.seed = kwargs[
|
|
67
|
+
if "NEUROSYMBOLIC_ENGINE_API_KEY" in kwargs:
|
|
68
|
+
openai.api_key = kwargs["NEUROSYMBOLIC_ENGINE_API_KEY"]
|
|
69
|
+
if "NEUROSYMBOLIC_ENGINE_MODEL" in kwargs:
|
|
70
|
+
self.model = kwargs["NEUROSYMBOLIC_ENGINE_MODEL"]
|
|
71
|
+
if "seed" in kwargs:
|
|
72
|
+
self.seed = kwargs["seed"]
|
|
69
73
|
|
|
70
74
|
def compute_required_tokens(self, messages):
|
|
71
75
|
"""Return the number of tokens used by a list of messages."""
|
|
72
76
|
|
|
73
77
|
if self.model in {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
78
|
+
"o1",
|
|
79
|
+
"o3",
|
|
80
|
+
"o3-mini",
|
|
81
|
+
"o4-mini",
|
|
82
|
+
"gpt-5",
|
|
83
|
+
"gpt-5.1",
|
|
84
|
+
"gpt-5-mini",
|
|
85
|
+
"gpt-5-nano",
|
|
86
|
+
}:
|
|
82
87
|
tokens_per_message = 3
|
|
83
88
|
tokens_per_name = 1
|
|
84
89
|
else:
|
|
85
|
-
|
|
90
|
+
UserMessage(
|
|
86
91
|
f"'num_tokens_from_messages()' is not implemented for model {self.model}. "
|
|
87
92
|
"See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken for information on how messages are converted to tokens.",
|
|
88
|
-
raise_with=NotImplementedError
|
|
93
|
+
raise_with=NotImplementedError,
|
|
89
94
|
)
|
|
90
95
|
|
|
91
96
|
num_tokens = 0
|
|
92
97
|
for message in messages:
|
|
93
98
|
num_tokens += tokens_per_message
|
|
94
99
|
for key, value in message.items():
|
|
95
|
-
if
|
|
100
|
+
if isinstance(value, str):
|
|
96
101
|
num_tokens += len(self.tokenizer.encode(value, disallowed_special=()))
|
|
97
102
|
else:
|
|
98
103
|
for v in value:
|
|
99
|
-
if v[
|
|
100
|
-
num_tokens += len(
|
|
104
|
+
if v["type"] == "text":
|
|
105
|
+
num_tokens += len(
|
|
106
|
+
self.tokenizer.encode(v["text"], disallowed_special=())
|
|
107
|
+
)
|
|
101
108
|
if key == "name":
|
|
102
109
|
num_tokens += tokens_per_name
|
|
103
110
|
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
|
104
|
-
return num_tokens - 1
|
|
111
|
+
return num_tokens - 1 # don't know where that extra 1 comes from
|
|
105
112
|
|
|
106
113
|
def compute_remaining_tokens(self, prompts: list) -> int:
|
|
107
114
|
val = self.compute_required_tokens(prompts)
|
|
@@ -109,35 +116,40 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
109
116
|
|
|
110
117
|
def _handle_image_content(self, content: str) -> list:
|
|
111
118
|
"""Handle image content by processing vision patterns and returning image file data."""
|
|
119
|
+
|
|
112
120
|
def _extract_pattern(text):
|
|
113
|
-
pattern = r
|
|
121
|
+
pattern = r"<<vision:(.*?):>>"
|
|
114
122
|
return re.findall(pattern, text)
|
|
115
123
|
|
|
116
124
|
image_files = []
|
|
117
125
|
# pre-process prompt if contains image url
|
|
118
|
-
if (
|
|
119
|
-
self.model ==
|
|
120
|
-
self.model ==
|
|
121
|
-
self.model ==
|
|
126
|
+
if (
|
|
127
|
+
self.model == "o1"
|
|
128
|
+
or self.model == "gpt-5"
|
|
129
|
+
or self.model == "gpt-5.1"
|
|
130
|
+
or self.model == "gpt-5-mini"
|
|
131
|
+
or self.model == "gpt-5-nano"
|
|
132
|
+
) and "<<vision:" in content:
|
|
122
133
|
parts = _extract_pattern(content)
|
|
123
134
|
for p in parts:
|
|
124
135
|
img_ = p.strip()
|
|
125
|
-
if img_.startswith(
|
|
126
|
-
image_files.append(img_)
|
|
127
|
-
elif img_.startswith('data:image'):
|
|
136
|
+
if img_.startswith("http") or img_.startswith("data:image"):
|
|
128
137
|
image_files.append(img_)
|
|
129
138
|
else:
|
|
130
139
|
max_frames_spacing = 50
|
|
131
140
|
max_used_frames = 10
|
|
132
|
-
if img_.startswith(
|
|
133
|
-
img_ = img_.replace(
|
|
134
|
-
max_used_frames, img_ = img_.split(
|
|
141
|
+
if img_.startswith("frames:"):
|
|
142
|
+
img_ = img_.replace("frames:", "")
|
|
143
|
+
max_used_frames, img_ = img_.split(":")
|
|
135
144
|
max_used_frames = int(max_used_frames)
|
|
136
145
|
if max_used_frames < 1 or max_used_frames > max_frames_spacing:
|
|
137
|
-
|
|
146
|
+
UserMessage(
|
|
147
|
+
f"Invalid max_used_frames value: {max_used_frames}. Expected value between 1 and {max_frames_spacing}",
|
|
148
|
+
raise_with=ValueError,
|
|
149
|
+
)
|
|
138
150
|
buffer, ext = encode_media_frames(img_)
|
|
139
151
|
if len(buffer) > 1:
|
|
140
|
-
step = len(buffer) // max_frames_spacing
|
|
152
|
+
step = len(buffer) // max_frames_spacing # max frames spacing
|
|
141
153
|
frames = []
|
|
142
154
|
indices = list(range(0, len(buffer), step))[:max_used_frames]
|
|
143
155
|
for i in indices:
|
|
@@ -146,24 +158,95 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
146
158
|
elif len(buffer) == 1:
|
|
147
159
|
image_files.append(f"data:image/{ext};base64,{buffer[0]}")
|
|
148
160
|
else:
|
|
149
|
-
|
|
161
|
+
UserMessage("No frames found or error in encoding frames")
|
|
150
162
|
return image_files
|
|
151
163
|
|
|
152
164
|
def _remove_vision_pattern(self, text: str) -> str:
|
|
153
165
|
"""Remove vision patterns from text."""
|
|
154
|
-
pattern = r
|
|
155
|
-
return re.sub(pattern,
|
|
166
|
+
pattern = r"<<vision:(.*?):>>"
|
|
167
|
+
return re.sub(pattern, "", text)
|
|
156
168
|
|
|
157
|
-
def
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
new_len = max(100, new_len) # Ensure minimum token length
|
|
162
|
-
return tokens[-new_len:] if truncation_type == 'head' else tokens[:new_len] # else 'tail'
|
|
169
|
+
def _slice_tokens(self, tokens, new_len, truncation_type):
|
|
170
|
+
"""Slice tokens based on truncation type."""
|
|
171
|
+
new_len = max(100, new_len) # Ensure minimum token length
|
|
172
|
+
return tokens[-new_len:] if truncation_type == "head" else tokens[:new_len] # else 'tail'
|
|
163
173
|
|
|
164
|
-
|
|
174
|
+
def _validate_truncation_prompts(self, prompts: list[dict]) -> bool:
|
|
175
|
+
"""Validate prompt structure before truncation."""
|
|
176
|
+
if len(prompts) != 2 and all(prompt["role"] in ["developer", "user"] for prompt in prompts):
|
|
165
177
|
# Only support developer and user prompts
|
|
166
|
-
|
|
178
|
+
UserMessage(
|
|
179
|
+
f"Token truncation currently supports only two messages, from 'user' and 'developer' (got {len(prompts)}). Returning original prompts."
|
|
180
|
+
)
|
|
181
|
+
return False
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
def _collect_user_tokens(
|
|
185
|
+
self,
|
|
186
|
+
user_prompt: dict,
|
|
187
|
+
) -> tuple[list[int], bool]:
|
|
188
|
+
"""Collect user tokens and detect unsupported content."""
|
|
189
|
+
user_tokens: list[int] = []
|
|
190
|
+
user_content = user_prompt["content"]
|
|
191
|
+
if isinstance(user_content, str):
|
|
192
|
+
user_tokens.extend(Symbol(user_content).tokens)
|
|
193
|
+
return user_tokens, False
|
|
194
|
+
if isinstance(user_content, list):
|
|
195
|
+
for content_item in user_content:
|
|
196
|
+
if isinstance(content_item, dict):
|
|
197
|
+
if content_item.get("type") == "text":
|
|
198
|
+
user_tokens.extend(Symbol(content_item["text"]).tokens)
|
|
199
|
+
else:
|
|
200
|
+
return user_tokens, True
|
|
201
|
+
else:
|
|
202
|
+
UserMessage(
|
|
203
|
+
f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python",
|
|
204
|
+
raise_with=ValueError,
|
|
205
|
+
)
|
|
206
|
+
return user_tokens, False
|
|
207
|
+
return UserMessage(
|
|
208
|
+
f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python",
|
|
209
|
+
raise_with=ValueError,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def _truncate_single_prompt_exceed(
|
|
213
|
+
self,
|
|
214
|
+
system_tokens,
|
|
215
|
+
user_tokens,
|
|
216
|
+
system_token_count,
|
|
217
|
+
user_token_count,
|
|
218
|
+
max_prompt_tokens,
|
|
219
|
+
truncation_type,
|
|
220
|
+
):
|
|
221
|
+
"""Handle truncation when only one prompt exceeds the limit."""
|
|
222
|
+
half_limit = max_prompt_tokens / 2
|
|
223
|
+
if user_token_count > half_limit and system_token_count <= half_limit:
|
|
224
|
+
new_user_len = max_prompt_tokens - system_token_count
|
|
225
|
+
new_user_tokens = self._slice_tokens(user_tokens, new_user_len, truncation_type)
|
|
226
|
+
return [
|
|
227
|
+
{"role": "developer", "content": self.tokenizer.decode(system_tokens)},
|
|
228
|
+
{
|
|
229
|
+
"role": "user",
|
|
230
|
+
"content": [{"type": "text", "text": self.tokenizer.decode(new_user_tokens)}],
|
|
231
|
+
},
|
|
232
|
+
]
|
|
233
|
+
if system_token_count > half_limit and user_token_count <= half_limit:
|
|
234
|
+
new_system_len = max_prompt_tokens - user_token_count
|
|
235
|
+
new_system_tokens = self._slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
236
|
+
return [
|
|
237
|
+
{"role": "developer", "content": self.tokenizer.decode(new_system_tokens)},
|
|
238
|
+
{
|
|
239
|
+
"role": "user",
|
|
240
|
+
"content": [{"type": "text", "text": self.tokenizer.decode(user_tokens)}],
|
|
241
|
+
},
|
|
242
|
+
]
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
def truncate(
|
|
246
|
+
self, prompts: list[dict], truncation_percentage: float | None, truncation_type: str
|
|
247
|
+
) -> list[dict]:
|
|
248
|
+
"""Main truncation method"""
|
|
249
|
+
if not self._validate_truncation_prompts(prompts):
|
|
167
250
|
return prompts
|
|
168
251
|
|
|
169
252
|
if truncation_percentage is None:
|
|
@@ -174,26 +257,12 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
174
257
|
user_prompt = prompts[1]
|
|
175
258
|
|
|
176
259
|
# Get token counts
|
|
177
|
-
system_tokens = Symbol(system_prompt[
|
|
260
|
+
system_tokens = Symbol(system_prompt["content"]).tokens
|
|
178
261
|
user_tokens = []
|
|
179
262
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
elif isinstance(user_prompt['content'], list):
|
|
184
|
-
for content_item in user_prompt['content']:
|
|
185
|
-
# Image input format
|
|
186
|
-
if isinstance(content_item, dict):
|
|
187
|
-
if content_item.get('type') == 'text':
|
|
188
|
-
user_tokens.extend(Symbol(content_item['text']).tokens)
|
|
189
|
-
else:
|
|
190
|
-
# Image content; return original since not supported
|
|
191
|
-
return prompts
|
|
192
|
-
else:
|
|
193
|
-
CustomUserWarning(f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
|
|
194
|
-
else:
|
|
195
|
-
# Unknown input format
|
|
196
|
-
CustomUserWarning(f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
|
|
263
|
+
user_tokens, should_return_original = self._collect_user_tokens(user_prompt)
|
|
264
|
+
if should_return_original:
|
|
265
|
+
return prompts
|
|
197
266
|
|
|
198
267
|
system_token_count = len(system_tokens)
|
|
199
268
|
user_token_count = len(user_tokens)
|
|
@@ -208,33 +277,26 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
208
277
|
if total_tokens <= max_prompt_tokens:
|
|
209
278
|
return prompts
|
|
210
279
|
|
|
211
|
-
|
|
280
|
+
UserMessage(
|
|
212
281
|
f"Executing {truncation_type} truncation to fit within {max_prompt_tokens} tokens. "
|
|
213
282
|
f"Combined prompts ({total_tokens} tokens) exceed maximum allowed tokens "
|
|
214
|
-
f"of {max_prompt_tokens} ({truncation_percentage*100:.1f}% of context). "
|
|
283
|
+
f"of {max_prompt_tokens} ({truncation_percentage * 100:.1f}% of context). "
|
|
215
284
|
f"You can control this behavior by setting 'truncation_percentage' (current: {truncation_percentage:.2f}) "
|
|
216
285
|
f"and 'truncation_type' (current: '{truncation_type}') parameters. "
|
|
217
286
|
f"Set 'truncation_percentage=1.0' to deactivate truncation (will fail if exceeding context window). "
|
|
218
287
|
f"Choose 'truncation_type' as 'head' to keep the end of prompts or 'tail' to keep the beginning."
|
|
219
288
|
)
|
|
220
289
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if system_token_count > max_prompt_tokens/2 and user_token_count <= max_prompt_tokens/2:
|
|
232
|
-
new_system_len = max_prompt_tokens - user_token_count
|
|
233
|
-
new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
234
|
-
return [
|
|
235
|
-
{'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
|
|
236
|
-
{'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]}
|
|
237
|
-
]
|
|
290
|
+
single_prompt_adjustment = self._truncate_single_prompt_exceed(
|
|
291
|
+
system_tokens,
|
|
292
|
+
user_tokens,
|
|
293
|
+
system_token_count,
|
|
294
|
+
user_token_count,
|
|
295
|
+
max_prompt_tokens,
|
|
296
|
+
truncation_type,
|
|
297
|
+
)
|
|
298
|
+
if single_prompt_adjustment is not None:
|
|
299
|
+
return single_prompt_adjustment
|
|
238
300
|
|
|
239
301
|
# Case 3: Both exceed - reduce proportionally
|
|
240
302
|
system_ratio = system_token_count / total_tokens
|
|
@@ -246,43 +308,53 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
246
308
|
new_system_len += distribute_tokens // 2
|
|
247
309
|
new_user_len += distribute_tokens // 2
|
|
248
310
|
|
|
249
|
-
new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
250
|
-
new_user_tokens = _slice_tokens(user_tokens, new_user_len, truncation_type)
|
|
311
|
+
new_system_tokens = self._slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
312
|
+
new_user_tokens = self._slice_tokens(user_tokens, new_user_len, truncation_type)
|
|
251
313
|
|
|
252
314
|
return [
|
|
253
|
-
{
|
|
254
|
-
{
|
|
315
|
+
{"role": "developer", "content": self.tokenizer.decode(new_system_tokens)},
|
|
316
|
+
{
|
|
317
|
+
"role": "user",
|
|
318
|
+
"content": [{"type": "text", "text": self.tokenizer.decode(new_user_tokens)}],
|
|
319
|
+
},
|
|
255
320
|
]
|
|
256
321
|
|
|
257
322
|
def forward(self, argument):
|
|
258
323
|
kwargs = argument.kwargs
|
|
259
|
-
truncation_percentage = kwargs.get(
|
|
260
|
-
|
|
261
|
-
|
|
324
|
+
truncation_percentage = kwargs.get(
|
|
325
|
+
"truncation_percentage", argument.prop.truncation_percentage
|
|
326
|
+
)
|
|
327
|
+
truncation_type = kwargs.get("truncation_type", argument.prop.truncation_type)
|
|
328
|
+
messages = self.truncate(
|
|
329
|
+
argument.prop.prepared_input, truncation_percentage, truncation_type
|
|
330
|
+
)
|
|
262
331
|
payload = self._prepare_request_payload(messages, argument)
|
|
263
|
-
except_remedy = kwargs.get(
|
|
332
|
+
except_remedy = kwargs.get("except_remedy")
|
|
264
333
|
|
|
265
334
|
try:
|
|
266
335
|
res = self.client.chat.completions.create(**payload)
|
|
267
336
|
|
|
268
337
|
except Exception as e:
|
|
269
|
-
if openai.api_key is None or openai.api_key ==
|
|
270
|
-
msg =
|
|
271
|
-
|
|
272
|
-
if
|
|
273
|
-
|
|
274
|
-
|
|
338
|
+
if openai.api_key is None or openai.api_key == "":
|
|
339
|
+
msg = "OpenAI API key is not set. Please set it in the config file or pass it as an argument to the command method."
|
|
340
|
+
UserMessage(msg)
|
|
341
|
+
if (
|
|
342
|
+
self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] is None
|
|
343
|
+
or self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] == ""
|
|
344
|
+
):
|
|
345
|
+
UserMessage(msg, raise_with=ValueError)
|
|
346
|
+
openai.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
|
|
275
347
|
|
|
276
348
|
callback = self.client.chat.completions.create
|
|
277
|
-
kwargs[
|
|
349
|
+
kwargs["model"] = kwargs.get("model", self.model)
|
|
278
350
|
|
|
279
351
|
if except_remedy is not None:
|
|
280
352
|
res = except_remedy(self, e, callback, argument)
|
|
281
353
|
else:
|
|
282
|
-
|
|
354
|
+
UserMessage(f"Error during generation. Caused by: {e}", raise_with=ValueError)
|
|
283
355
|
|
|
284
|
-
metadata = {
|
|
285
|
-
if payload.get(
|
|
356
|
+
metadata = {"raw_output": res}
|
|
357
|
+
if payload.get("tools"):
|
|
286
358
|
metadata = self._process_function_calls(res, metadata)
|
|
287
359
|
output = [r.message.content for r in res.choices]
|
|
288
360
|
|
|
@@ -290,171 +362,262 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
290
362
|
|
|
291
363
|
def _prepare_raw_input(self, argument):
|
|
292
364
|
if not argument.prop.processed_input:
|
|
293
|
-
|
|
365
|
+
UserMessage(
|
|
366
|
+
"Need to provide a prompt instruction to the engine if raw_input is enabled.",
|
|
367
|
+
raise_with=ValueError,
|
|
368
|
+
)
|
|
294
369
|
value = argument.prop.processed_input
|
|
295
370
|
# convert to dict if not already
|
|
296
|
-
if
|
|
297
|
-
if
|
|
298
|
-
value = {
|
|
371
|
+
if not isinstance(value, list):
|
|
372
|
+
if not isinstance(value, dict):
|
|
373
|
+
value = {"role": "user", "content": str(value)}
|
|
299
374
|
value = [value]
|
|
300
375
|
return value
|
|
301
376
|
|
|
302
|
-
def
|
|
303
|
-
if
|
|
304
|
-
argument.prop.prepared_input = self._prepare_raw_input(argument)
|
|
305
|
-
return
|
|
306
|
-
|
|
307
|
-
_non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
|
|
308
|
-
user: str = ""
|
|
309
|
-
developer: str = ""
|
|
310
|
-
|
|
377
|
+
def _non_verbose_section(self, argument) -> str:
|
|
378
|
+
"""Return non-verbose instruction section if needed."""
|
|
311
379
|
if argument.prop.suppress_verbose_output:
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
380
|
+
return (
|
|
381
|
+
"<META_INSTRUCTION/>\n"
|
|
382
|
+
"You do not output anything else, like verbose preambles or post explanation, such as "
|
|
383
|
+
'"Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. '
|
|
384
|
+
"Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use "
|
|
385
|
+
"indentation, etc. Never add meta instructions information to your output!\n\n"
|
|
386
|
+
)
|
|
387
|
+
return ""
|
|
388
|
+
|
|
389
|
+
def _response_format_section(self, argument) -> str:
|
|
390
|
+
"""Return response format instructions if provided."""
|
|
391
|
+
if not argument.prop.response_format:
|
|
392
|
+
return ""
|
|
393
|
+
response_format = argument.prop.response_format
|
|
394
|
+
assert response_format.get("type") is not None, (
|
|
395
|
+
'Expected format `{ "type": "json_object" }`! See https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format'
|
|
396
|
+
)
|
|
397
|
+
if response_format["type"] == "json_object":
|
|
398
|
+
return "<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n"
|
|
399
|
+
return ""
|
|
400
|
+
|
|
401
|
+
def _context_sections(self, argument) -> list[str]:
|
|
402
|
+
"""Return static and dynamic context sections."""
|
|
403
|
+
sections: list[str] = []
|
|
404
|
+
static_ctxt, dyn_ctxt = argument.prop.instance.global_context
|
|
326
405
|
if len(static_ctxt) > 0:
|
|
327
|
-
|
|
328
|
-
|
|
406
|
+
sections.append(f"<STATIC CONTEXT/>\n{static_ctxt}\n\n")
|
|
329
407
|
if len(dyn_ctxt) > 0:
|
|
330
|
-
|
|
408
|
+
sections.append(f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n")
|
|
409
|
+
return sections
|
|
331
410
|
|
|
332
|
-
|
|
411
|
+
def _additional_context_section(self, argument) -> str:
|
|
412
|
+
"""Return additional payload context if any."""
|
|
333
413
|
if argument.prop.payload:
|
|
334
|
-
|
|
414
|
+
return f"<ADDITIONAL CONTEXT/>\n{argument.prop.payload!s}\n\n"
|
|
415
|
+
return ""
|
|
335
416
|
|
|
417
|
+
def _examples_section(self, argument) -> str:
|
|
418
|
+
"""Return examples section if provided."""
|
|
336
419
|
examples: list[str] = argument.prop.examples
|
|
337
420
|
if examples and len(examples) > 0:
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
421
|
+
return f"<EXAMPLES/>\n{examples!s}\n\n"
|
|
422
|
+
return ""
|
|
423
|
+
|
|
424
|
+
def _instruction_section(self, argument, image_files: list[str]) -> str:
|
|
425
|
+
"""Return instruction section, removing vision patterns when needed."""
|
|
426
|
+
prompt = argument.prop.prompt
|
|
427
|
+
if prompt is None or len(prompt) == 0:
|
|
428
|
+
return ""
|
|
429
|
+
value = str(prompt)
|
|
430
|
+
if len(image_files) > 0:
|
|
431
|
+
value = self._remove_vision_pattern(value)
|
|
432
|
+
return f"<INSTRUCTION/>\n{value}\n\n"
|
|
433
|
+
|
|
434
|
+
def _build_developer_prompt(self, argument, image_files: list[str]) -> str:
|
|
435
|
+
"""Assemble developer prompt content."""
|
|
436
|
+
developer = self._non_verbose_section(argument)
|
|
437
|
+
developer = f"{developer}\n" if developer else ""
|
|
438
|
+
|
|
439
|
+
parts = [
|
|
440
|
+
self._response_format_section(argument),
|
|
441
|
+
*self._context_sections(argument),
|
|
442
|
+
self._additional_context_section(argument),
|
|
443
|
+
self._examples_section(argument),
|
|
444
|
+
self._instruction_section(argument, image_files),
|
|
445
|
+
]
|
|
446
|
+
developer += "".join(part for part in parts if part)
|
|
341
447
|
|
|
342
|
-
if argument.prop.
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
448
|
+
if argument.prop.template_suffix:
|
|
449
|
+
developer += (
|
|
450
|
+
f" You will only generate content for the placeholder `{argument.prop.template_suffix!s}` "
|
|
451
|
+
"following the instructions and the provided context information.\n\n"
|
|
452
|
+
)
|
|
453
|
+
return developer
|
|
347
454
|
|
|
455
|
+
def _build_user_suffix(self, argument, image_files: list[str]) -> str:
|
|
456
|
+
"""Prepare user content suffix."""
|
|
348
457
|
suffix: str = str(argument.prop.processed_input)
|
|
349
458
|
if len(image_files) > 0:
|
|
350
459
|
suffix = self._remove_vision_pattern(suffix)
|
|
460
|
+
return suffix
|
|
351
461
|
|
|
352
|
-
|
|
462
|
+
def _construct_user_prompt(self, user_text: str, image_files: list[str]):
|
|
463
|
+
"""Construct user prompt payload."""
|
|
464
|
+
if self.model in {
|
|
465
|
+
"o1",
|
|
466
|
+
"o3",
|
|
467
|
+
"o3-mini",
|
|
468
|
+
"o4-mini",
|
|
469
|
+
"gpt-5",
|
|
470
|
+
"gpt-5.1",
|
|
471
|
+
"gpt-5-mini",
|
|
472
|
+
"gpt-5-nano",
|
|
473
|
+
}:
|
|
474
|
+
images = [{"type": "image_url", "image_url": {"url": file}} for file in image_files]
|
|
475
|
+
user_prompt = {
|
|
476
|
+
"role": "user",
|
|
477
|
+
"content": [
|
|
478
|
+
*images,
|
|
479
|
+
{"type": "text", "text": user_text},
|
|
480
|
+
],
|
|
481
|
+
}
|
|
482
|
+
return user_prompt, images
|
|
483
|
+
return {"role": "user", "content": user_text}, None
|
|
484
|
+
|
|
485
|
+
def _apply_self_prompt(
|
|
486
|
+
self,
|
|
487
|
+
argument,
|
|
488
|
+
user_prompt,
|
|
489
|
+
developer: str,
|
|
490
|
+
user_text: str,
|
|
491
|
+
images,
|
|
492
|
+
image_files: list[str],
|
|
493
|
+
):
|
|
494
|
+
"""Apply self-prompting when requested."""
|
|
495
|
+
instance = argument.prop.instance
|
|
496
|
+
if not (instance._kwargs.get("self_prompt", False) or argument.prop.self_prompt):
|
|
497
|
+
return user_prompt, developer
|
|
498
|
+
|
|
499
|
+
self_prompter = SelfPrompt()
|
|
500
|
+
res = self_prompter({"user": user_text, "developer": developer})
|
|
501
|
+
if res is None:
|
|
502
|
+
UserMessage("Self-prompting failed!", raise_with=ValueError)
|
|
353
503
|
|
|
354
|
-
if
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
504
|
+
if len(image_files) > 0:
|
|
505
|
+
image_content = (
|
|
506
|
+
images
|
|
507
|
+
if images is not None
|
|
508
|
+
else [{"type": "image_url", "image_url": {"url": file}} for file in image_files]
|
|
509
|
+
)
|
|
510
|
+
user_prompt = {
|
|
511
|
+
"role": "user",
|
|
512
|
+
"content": [
|
|
513
|
+
*image_content,
|
|
514
|
+
{"type": "text", "text": res["user"]},
|
|
515
|
+
],
|
|
516
|
+
}
|
|
363
517
|
else:
|
|
364
|
-
user_prompt = {
|
|
518
|
+
user_prompt = {"role": "user", "content": res["user"]}
|
|
365
519
|
|
|
366
|
-
|
|
367
|
-
if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
|
|
368
|
-
self_prompter = SelfPrompt()
|
|
369
|
-
res = self_prompter({'user': user, 'developer': developer})
|
|
370
|
-
if res is None:
|
|
371
|
-
CustomUserWarning("Self-prompting failed!", raise_with=ValueError)
|
|
520
|
+
return user_prompt, res["developer"]
|
|
372
521
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
user_prompt = { "role": "user", "content": res['user'] }
|
|
522
|
+
def prepare(self, argument):
|
|
523
|
+
if argument.prop.raw_input:
|
|
524
|
+
argument.prop.prepared_input = self._prepare_raw_input(argument)
|
|
525
|
+
return
|
|
526
|
+
|
|
527
|
+
image_files = self._handle_image_content(str(argument.prop.processed_input))
|
|
380
528
|
|
|
381
|
-
|
|
529
|
+
developer = self._build_developer_prompt(argument, image_files)
|
|
530
|
+
user_text = self._build_user_suffix(argument, image_files)
|
|
531
|
+
user_prompt, images = self._construct_user_prompt(user_text, image_files)
|
|
532
|
+
user_prompt, developer = self._apply_self_prompt(
|
|
533
|
+
argument,
|
|
534
|
+
user_prompt,
|
|
535
|
+
developer,
|
|
536
|
+
user_text,
|
|
537
|
+
images,
|
|
538
|
+
image_files,
|
|
539
|
+
)
|
|
382
540
|
|
|
383
541
|
argument.prop.prepared_input = [
|
|
384
|
-
{
|
|
542
|
+
{"role": "developer", "content": developer},
|
|
385
543
|
user_prompt,
|
|
386
544
|
]
|
|
387
545
|
|
|
388
546
|
def _process_function_calls(self, res, metadata):
|
|
389
547
|
hit = False
|
|
390
|
-
if
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
548
|
+
if (
|
|
549
|
+
hasattr(res, "choices")
|
|
550
|
+
and res.choices
|
|
551
|
+
and hasattr(res.choices[0], "message")
|
|
552
|
+
and res.choices[0].message
|
|
553
|
+
and hasattr(res.choices[0].message, "tool_calls")
|
|
554
|
+
and res.choices[0].message.tool_calls
|
|
555
|
+
):
|
|
556
|
+
for tool_call in res.choices[0].message.tool_calls:
|
|
557
|
+
if hit:
|
|
558
|
+
UserMessage(
|
|
559
|
+
"Multiple function calls detected in the response but only the first one will be processed."
|
|
560
|
+
)
|
|
561
|
+
break
|
|
562
|
+
if hasattr(tool_call, "function") and tool_call.function:
|
|
563
|
+
try:
|
|
564
|
+
args_dict = json.loads(tool_call.function.arguments)
|
|
565
|
+
except json.JSONDecodeError:
|
|
566
|
+
args_dict = {}
|
|
567
|
+
metadata["function_call"] = {
|
|
568
|
+
"name": tool_call.function.name,
|
|
569
|
+
"arguments": args_dict,
|
|
570
|
+
}
|
|
571
|
+
hit = True
|
|
408
572
|
return metadata
|
|
409
573
|
|
|
410
574
|
def _prepare_request_payload(self, messages, argument):
|
|
411
575
|
"""Prepares the request payload from the argument."""
|
|
412
576
|
kwargs = argument.kwargs
|
|
413
577
|
|
|
414
|
-
max_tokens = kwargs.get(
|
|
415
|
-
max_completion_tokens = kwargs.get(
|
|
578
|
+
max_tokens = kwargs.get("max_tokens", None)
|
|
579
|
+
max_completion_tokens = kwargs.get("max_completion_tokens", None)
|
|
416
580
|
remaining_tokens = self.compute_remaining_tokens(messages)
|
|
417
581
|
|
|
418
582
|
if max_tokens is not None:
|
|
419
|
-
|
|
583
|
+
UserMessage(
|
|
420
584
|
"'max_tokens' is now deprecated in favor of 'max_completion_tokens', and is not compatible with o1 series models. "
|
|
421
585
|
"We handle this conversion by default for you for now but we won't in the future. "
|
|
422
586
|
"See: https://platform.openai.com/docs/api-reference/chat/create"
|
|
423
587
|
)
|
|
424
588
|
if max_tokens > self.max_response_tokens:
|
|
425
|
-
|
|
589
|
+
UserMessage(
|
|
426
590
|
f"Provided 'max_tokens' ({max_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
|
|
427
591
|
f"Truncating to {remaining_tokens} to avoid API failure."
|
|
428
592
|
)
|
|
429
|
-
kwargs[
|
|
593
|
+
kwargs["max_completion_tokens"] = remaining_tokens
|
|
430
594
|
else:
|
|
431
|
-
kwargs[
|
|
432
|
-
del kwargs[
|
|
595
|
+
kwargs["max_completion_tokens"] = max_tokens
|
|
596
|
+
del kwargs["max_tokens"]
|
|
433
597
|
|
|
434
|
-
if max_completion_tokens is not None:
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
kwargs['max_completion_tokens'] = remaining_tokens
|
|
598
|
+
if max_completion_tokens is not None and max_completion_tokens > self.max_response_tokens:
|
|
599
|
+
UserMessage(
|
|
600
|
+
f"Provided 'max_completion_tokens' ({max_completion_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
|
|
601
|
+
f"Truncating to {remaining_tokens} to avoid API failure."
|
|
602
|
+
)
|
|
603
|
+
kwargs["max_completion_tokens"] = remaining_tokens
|
|
441
604
|
|
|
442
605
|
payload = {
|
|
443
606
|
"messages": messages,
|
|
444
|
-
"model": kwargs.get(
|
|
445
|
-
"seed": kwargs.get(
|
|
446
|
-
"reasoning_effort": kwargs.get(
|
|
447
|
-
"max_completion_tokens": kwargs.get(
|
|
448
|
-
"stop": kwargs.get(
|
|
449
|
-
"temperature": kwargs.get(
|
|
450
|
-
"frequency_penalty": kwargs.get(
|
|
451
|
-
"presence_penalty": kwargs.get(
|
|
452
|
-
"top_p": kwargs.get(
|
|
453
|
-
"n": kwargs.get(
|
|
454
|
-
"logit_bias": kwargs.get(
|
|
455
|
-
"tools": kwargs.get(
|
|
456
|
-
"tool_choice": kwargs.get(
|
|
457
|
-
"response_format": kwargs.get(
|
|
607
|
+
"model": kwargs.get("model", self.model),
|
|
608
|
+
"seed": kwargs.get("seed", self.seed),
|
|
609
|
+
"reasoning_effort": kwargs.get("reasoning_effort", "medium"),
|
|
610
|
+
"max_completion_tokens": kwargs.get("max_completion_tokens"),
|
|
611
|
+
"stop": kwargs.get("stop", ""),
|
|
612
|
+
"temperature": kwargs.get("temperature", 1),
|
|
613
|
+
"frequency_penalty": kwargs.get("frequency_penalty", 0),
|
|
614
|
+
"presence_penalty": kwargs.get("presence_penalty", 0),
|
|
615
|
+
"top_p": kwargs.get("top_p", 1),
|
|
616
|
+
"n": kwargs.get("n", 1),
|
|
617
|
+
"logit_bias": kwargs.get("logit_bias"),
|
|
618
|
+
"tools": kwargs.get("tools"),
|
|
619
|
+
"tool_choice": kwargs.get("tool_choice"),
|
|
620
|
+
"response_format": kwargs.get("response_format"),
|
|
458
621
|
}
|
|
459
622
|
|
|
460
623
|
if self.model == "o4-mini" or self.model == "o3":
|