symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +96 -64
- symai/backend/base.py +93 -80
- symai/backend/engines/drawing/engine_bfl.py +12 -11
- symai/backend/engines/drawing/engine_gpt_image.py +108 -87
- symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
- symai/backend/engines/embedding/engine_openai.py +3 -5
- symai/backend/engines/execute/engine_python.py +6 -5
- symai/backend/engines/files/engine_io.py +74 -67
- symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
- symai/backend/engines/index/engine_pinecone.py +23 -24
- symai/backend/engines/index/engine_vectordb.py +16 -14
- symai/backend/engines/lean/engine_lean4.py +38 -34
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
- symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
- symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
- symai/backend/engines/ocr/engine_apilayer.py +6 -8
- symai/backend/engines/output/engine_stdout.py +1 -4
- symai/backend/engines/search/engine_openai.py +7 -7
- symai/backend/engines/search/engine_perplexity.py +5 -5
- symai/backend/engines/search/engine_serpapi.py +12 -14
- symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
- symai/backend/engines/text_to_speech/engine_openai.py +5 -7
- symai/backend/engines/text_vision/engine_clip.py +7 -11
- symai/backend/engines/userinput/engine_console.py +3 -3
- symai/backend/engines/webscraping/engine_requests.py +81 -48
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +4 -2
- symai/backend/mixin/deepseek.py +2 -0
- symai/backend/mixin/google.py +2 -0
- symai/backend/mixin/openai.py +11 -3
- symai/backend/settings.py +83 -16
- symai/chat.py +101 -78
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +77 -69
- symai/collect/pipeline.py +35 -27
- symai/collect/stats.py +75 -63
- symai/components.py +198 -169
- symai/constraints.py +15 -12
- symai/core.py +698 -359
- symai/core_ext.py +32 -34
- symai/endpoints/api.py +80 -73
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +11 -8
- symai/extended/arxiv_pdf_parser.py +13 -12
- symai/extended/bibtex_parser.py +2 -3
- symai/extended/conversation.py +101 -90
- symai/extended/document.py +17 -10
- symai/extended/file_merger.py +18 -13
- symai/extended/graph.py +18 -13
- symai/extended/html_style_template.py +2 -4
- symai/extended/interfaces/blip_2.py +1 -2
- symai/extended/interfaces/clip.py +1 -2
- symai/extended/interfaces/console.py +7 -1
- symai/extended/interfaces/dall_e.py +1 -1
- symai/extended/interfaces/flux.py +1 -1
- symai/extended/interfaces/gpt_image.py +1 -1
- symai/extended/interfaces/input.py +1 -1
- symai/extended/interfaces/llava.py +0 -1
- symai/extended/interfaces/naive_vectordb.py +7 -8
- symai/extended/interfaces/naive_webscraping.py +1 -1
- symai/extended/interfaces/ocr.py +1 -1
- symai/extended/interfaces/pinecone.py +6 -5
- symai/extended/interfaces/serpapi.py +1 -1
- symai/extended/interfaces/terminal.py +2 -3
- symai/extended/interfaces/tts.py +1 -1
- symai/extended/interfaces/whisper.py +1 -1
- symai/extended/interfaces/wolframalpha.py +1 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +11 -13
- symai/extended/os_command.py +17 -16
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +19 -16
- symai/extended/packages/sympkg.py +12 -9
- symai/extended/packages/symrun.py +21 -19
- symai/extended/repo_cloner.py +11 -10
- symai/extended/seo_query_optimizer.py +1 -2
- symai/extended/solver.py +20 -23
- symai/extended/summarizer.py +4 -3
- symai/extended/taypan_interpreter.py +10 -12
- symai/extended/vectordb.py +99 -82
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +12 -16
- symai/formatter/regex.py +62 -63
- symai/functional.py +176 -122
- symai/imports.py +136 -127
- symai/interfaces.py +56 -27
- symai/memory.py +14 -13
- symai/misc/console.py +49 -39
- symai/misc/loader.py +5 -3
- symai/models/__init__.py +17 -1
- symai/models/base.py +269 -181
- symai/models/errors.py +0 -1
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +11 -15
- symai/ops/primitives.py +348 -228
- symai/post_processors.py +32 -28
- symai/pre_processors.py +39 -41
- symai/processor.py +6 -4
- symai/prompts.py +59 -45
- symai/server/huggingface_server.py +23 -20
- symai/server/llama_cpp_server.py +7 -5
- symai/shell.py +3 -4
- symai/shellsv.py +499 -375
- symai/strategy.py +517 -287
- symai/symbol.py +111 -116
- symai/utils.py +42 -36
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
- symbolicai-1.0.0.dist-info/RECORD +163 -0
- symbolicai-0.20.2.dist-info/RECORD +0 -162
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,8 @@ import openai
|
|
|
7
7
|
import tiktoken
|
|
8
8
|
|
|
9
9
|
from ....components import SelfPrompt
|
|
10
|
-
from ....misc.console import ConsoleStyle
|
|
11
10
|
from ....symbol import Symbol
|
|
12
|
-
from ....utils import
|
|
11
|
+
from ....utils import UserMessage, encode_media_frames
|
|
13
12
|
from ...base import Engine
|
|
14
13
|
from ...mixin.openai import OpenAIMixin
|
|
15
14
|
from ...settings import SYMAI_CONFIG
|
|
@@ -36,7 +35,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
36
35
|
self.name = self.__class__.__name__
|
|
37
36
|
try:
|
|
38
37
|
self.tokenizer = tiktoken.encoding_for_model(self.model)
|
|
39
|
-
except Exception
|
|
38
|
+
except Exception:
|
|
40
39
|
self.tokenizer = tiktoken.get_encoding('o200k_base')
|
|
41
40
|
self.max_context_tokens = self.api_max_context_tokens()
|
|
42
41
|
self.max_response_tokens = self.api_max_response_tokens()
|
|
@@ -45,7 +44,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
45
44
|
try:
|
|
46
45
|
self.client = openai.Client(api_key=openai.api_key)
|
|
47
46
|
except Exception as e:
|
|
48
|
-
|
|
47
|
+
UserMessage(f'Failed to initialize OpenAI client. Please check your OpenAI library version. Caused by: {e}', raise_with=ValueError)
|
|
49
48
|
|
|
50
49
|
def id(self) -> str:
|
|
51
50
|
if self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
@@ -82,7 +81,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
82
81
|
tokens_per_message = 3
|
|
83
82
|
tokens_per_name = 1
|
|
84
83
|
else:
|
|
85
|
-
|
|
84
|
+
UserMessage(
|
|
86
85
|
f"'num_tokens_from_messages()' is not implemented for model {self.model}. "
|
|
87
86
|
"See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken for information on how messages are converted to tokens.",
|
|
88
87
|
raise_with=NotImplementedError
|
|
@@ -92,7 +91,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
92
91
|
for message in messages:
|
|
93
92
|
num_tokens += tokens_per_message
|
|
94
93
|
for key, value in message.items():
|
|
95
|
-
if
|
|
94
|
+
if isinstance(value, str):
|
|
96
95
|
num_tokens += len(self.tokenizer.encode(value, disallowed_special=()))
|
|
97
96
|
else:
|
|
98
97
|
for v in value:
|
|
@@ -122,9 +121,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
122
121
|
parts = _extract_pattern(content)
|
|
123
122
|
for p in parts:
|
|
124
123
|
img_ = p.strip()
|
|
125
|
-
if img_.startswith('http'):
|
|
126
|
-
image_files.append(img_)
|
|
127
|
-
elif img_.startswith('data:image'):
|
|
124
|
+
if img_.startswith('http') or img_.startswith('data:image'):
|
|
128
125
|
image_files.append(img_)
|
|
129
126
|
else:
|
|
130
127
|
max_frames_spacing = 50
|
|
@@ -134,7 +131,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
134
131
|
max_used_frames, img_ = img_.split(':')
|
|
135
132
|
max_used_frames = int(max_used_frames)
|
|
136
133
|
if max_used_frames < 1 or max_used_frames > max_frames_spacing:
|
|
137
|
-
|
|
134
|
+
UserMessage(f"Invalid max_used_frames value: {max_used_frames}. Expected value between 1 and {max_frames_spacing}", raise_with=ValueError)
|
|
138
135
|
buffer, ext = encode_media_frames(img_)
|
|
139
136
|
if len(buffer) > 1:
|
|
140
137
|
step = len(buffer) // max_frames_spacing # max frames spacing
|
|
@@ -146,7 +143,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
146
143
|
elif len(buffer) == 1:
|
|
147
144
|
image_files.append(f"data:image/{ext};base64,{buffer[0]}")
|
|
148
145
|
else:
|
|
149
|
-
|
|
146
|
+
UserMessage('No frames found or error in encoding frames')
|
|
150
147
|
return image_files
|
|
151
148
|
|
|
152
149
|
def _remove_vision_pattern(self, text: str) -> str:
|
|
@@ -154,16 +151,79 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
154
151
|
pattern = r'<<vision:(.*?):>>'
|
|
155
152
|
return re.sub(pattern, '', text)
|
|
156
153
|
|
|
157
|
-
def
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
new_len = max(100, new_len) # Ensure minimum token length
|
|
162
|
-
return tokens[-new_len:] if truncation_type == 'head' else tokens[:new_len] # else 'tail'
|
|
154
|
+
def _slice_tokens(self, tokens, new_len, truncation_type):
|
|
155
|
+
"""Slice tokens based on truncation type."""
|
|
156
|
+
new_len = max(100, new_len) # Ensure minimum token length
|
|
157
|
+
return tokens[-new_len:] if truncation_type == 'head' else tokens[:new_len] # else 'tail'
|
|
163
158
|
|
|
159
|
+
def _validate_truncation_prompts(self, prompts: list[dict]) -> bool:
|
|
160
|
+
"""Validate prompt structure before truncation."""
|
|
164
161
|
if len(prompts) != 2 and all(prompt['role'] in ['developer', 'user'] for prompt in prompts):
|
|
165
162
|
# Only support developer and user prompts
|
|
166
|
-
|
|
163
|
+
UserMessage(
|
|
164
|
+
f"Token truncation currently supports only two messages, from 'user' and 'developer' (got {len(prompts)}). Returning original prompts."
|
|
165
|
+
)
|
|
166
|
+
return False
|
|
167
|
+
return True
|
|
168
|
+
|
|
169
|
+
def _collect_user_tokens(
|
|
170
|
+
self,
|
|
171
|
+
user_prompt: dict,
|
|
172
|
+
) -> tuple[list[int], bool]:
|
|
173
|
+
"""Collect user tokens and detect unsupported content."""
|
|
174
|
+
user_tokens: list[int] = []
|
|
175
|
+
user_content = user_prompt['content']
|
|
176
|
+
if isinstance(user_content, str):
|
|
177
|
+
user_tokens.extend(Symbol(user_content).tokens)
|
|
178
|
+
return user_tokens, False
|
|
179
|
+
if isinstance(user_content, list):
|
|
180
|
+
for content_item in user_content:
|
|
181
|
+
if isinstance(content_item, dict):
|
|
182
|
+
if content_item.get('type') == 'text':
|
|
183
|
+
user_tokens.extend(Symbol(content_item['text']).tokens)
|
|
184
|
+
else:
|
|
185
|
+
return user_tokens, True
|
|
186
|
+
else:
|
|
187
|
+
UserMessage(
|
|
188
|
+
f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python",
|
|
189
|
+
raise_with=ValueError,
|
|
190
|
+
)
|
|
191
|
+
return user_tokens, False
|
|
192
|
+
return UserMessage(
|
|
193
|
+
f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python",
|
|
194
|
+
raise_with=ValueError,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _truncate_single_prompt_exceed(
|
|
198
|
+
self,
|
|
199
|
+
system_tokens,
|
|
200
|
+
user_tokens,
|
|
201
|
+
system_token_count,
|
|
202
|
+
user_token_count,
|
|
203
|
+
max_prompt_tokens,
|
|
204
|
+
truncation_type,
|
|
205
|
+
):
|
|
206
|
+
"""Handle truncation when only one prompt exceeds the limit."""
|
|
207
|
+
half_limit = max_prompt_tokens / 2
|
|
208
|
+
if user_token_count > half_limit and system_token_count <= half_limit:
|
|
209
|
+
new_user_len = max_prompt_tokens - system_token_count
|
|
210
|
+
new_user_tokens = self._slice_tokens(user_tokens, new_user_len, truncation_type)
|
|
211
|
+
return [
|
|
212
|
+
{'role': 'developer', 'content': self.tokenizer.decode(system_tokens)},
|
|
213
|
+
{'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(new_user_tokens)}]},
|
|
214
|
+
]
|
|
215
|
+
if system_token_count > half_limit and user_token_count <= half_limit:
|
|
216
|
+
new_system_len = max_prompt_tokens - user_token_count
|
|
217
|
+
new_system_tokens = self._slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
218
|
+
return [
|
|
219
|
+
{'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
|
|
220
|
+
{'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]},
|
|
221
|
+
]
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
def truncate(self, prompts: list[dict], truncation_percentage: float | None, truncation_type: str) -> list[dict]:
|
|
225
|
+
"""Main truncation method"""
|
|
226
|
+
if not self._validate_truncation_prompts(prompts):
|
|
167
227
|
return prompts
|
|
168
228
|
|
|
169
229
|
if truncation_percentage is None:
|
|
@@ -177,23 +237,9 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
177
237
|
system_tokens = Symbol(system_prompt['content']).tokens
|
|
178
238
|
user_tokens = []
|
|
179
239
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
elif isinstance(user_prompt['content'], list):
|
|
184
|
-
for content_item in user_prompt['content']:
|
|
185
|
-
# Image input format
|
|
186
|
-
if isinstance(content_item, dict):
|
|
187
|
-
if content_item.get('type') == 'text':
|
|
188
|
-
user_tokens.extend(Symbol(content_item['text']).tokens)
|
|
189
|
-
else:
|
|
190
|
-
# Image content; return original since not supported
|
|
191
|
-
return prompts
|
|
192
|
-
else:
|
|
193
|
-
CustomUserWarning(f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
|
|
194
|
-
else:
|
|
195
|
-
# Unknown input format
|
|
196
|
-
CustomUserWarning(f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
|
|
240
|
+
user_tokens, should_return_original = self._collect_user_tokens(user_prompt)
|
|
241
|
+
if should_return_original:
|
|
242
|
+
return prompts
|
|
197
243
|
|
|
198
244
|
system_token_count = len(system_tokens)
|
|
199
245
|
user_token_count = len(user_tokens)
|
|
@@ -208,7 +254,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
208
254
|
if total_tokens <= max_prompt_tokens:
|
|
209
255
|
return prompts
|
|
210
256
|
|
|
211
|
-
|
|
257
|
+
UserMessage(
|
|
212
258
|
f"Executing {truncation_type} truncation to fit within {max_prompt_tokens} tokens. "
|
|
213
259
|
f"Combined prompts ({total_tokens} tokens) exceed maximum allowed tokens "
|
|
214
260
|
f"of {max_prompt_tokens} ({truncation_percentage*100:.1f}% of context). "
|
|
@@ -218,23 +264,16 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
218
264
|
f"Choose 'truncation_type' as 'head' to keep the end of prompts or 'tail' to keep the beginning."
|
|
219
265
|
)
|
|
220
266
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if system_token_count > max_prompt_tokens/2 and user_token_count <= max_prompt_tokens/2:
|
|
232
|
-
new_system_len = max_prompt_tokens - user_token_count
|
|
233
|
-
new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
234
|
-
return [
|
|
235
|
-
{'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
|
|
236
|
-
{'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]}
|
|
237
|
-
]
|
|
267
|
+
single_prompt_adjustment = self._truncate_single_prompt_exceed(
|
|
268
|
+
system_tokens,
|
|
269
|
+
user_tokens,
|
|
270
|
+
system_token_count,
|
|
271
|
+
user_token_count,
|
|
272
|
+
max_prompt_tokens,
|
|
273
|
+
truncation_type,
|
|
274
|
+
)
|
|
275
|
+
if single_prompt_adjustment is not None:
|
|
276
|
+
return single_prompt_adjustment
|
|
238
277
|
|
|
239
278
|
# Case 3: Both exceed - reduce proportionally
|
|
240
279
|
system_ratio = system_token_count / total_tokens
|
|
@@ -246,8 +285,8 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
246
285
|
new_system_len += distribute_tokens // 2
|
|
247
286
|
new_user_len += distribute_tokens // 2
|
|
248
287
|
|
|
249
|
-
new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
250
|
-
new_user_tokens = _slice_tokens(user_tokens, new_user_len, truncation_type)
|
|
288
|
+
new_system_tokens = self._slice_tokens(system_tokens, new_system_len, truncation_type)
|
|
289
|
+
new_user_tokens = self._slice_tokens(user_tokens, new_user_len, truncation_type)
|
|
251
290
|
|
|
252
291
|
return [
|
|
253
292
|
{'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
|
|
@@ -268,18 +307,18 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
268
307
|
except Exception as e:
|
|
269
308
|
if openai.api_key is None or openai.api_key == '':
|
|
270
309
|
msg = 'OpenAI API key is not set. Please set it in the config file or pass it as an argument to the command method.'
|
|
271
|
-
|
|
310
|
+
UserMessage(msg)
|
|
272
311
|
if self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] is None or self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] == '':
|
|
273
|
-
|
|
312
|
+
UserMessage(msg, raise_with=ValueError)
|
|
274
313
|
openai.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
|
|
275
314
|
|
|
276
315
|
callback = self.client.chat.completions.create
|
|
277
|
-
kwargs['model'] = kwargs
|
|
316
|
+
kwargs['model'] = kwargs.get('model', self.model)
|
|
278
317
|
|
|
279
318
|
if except_remedy is not None:
|
|
280
319
|
res = except_remedy(self, e, callback, argument)
|
|
281
320
|
else:
|
|
282
|
-
|
|
321
|
+
UserMessage(f'Error during generation. Caused by: {e}', raise_with=ValueError)
|
|
283
322
|
|
|
284
323
|
metadata = {'raw_output': res}
|
|
285
324
|
if payload.get('tools'):
|
|
@@ -290,95 +329,173 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
290
329
|
|
|
291
330
|
def _prepare_raw_input(self, argument):
|
|
292
331
|
if not argument.prop.processed_input:
|
|
293
|
-
|
|
332
|
+
UserMessage('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
|
|
294
333
|
value = argument.prop.processed_input
|
|
295
334
|
# convert to dict if not already
|
|
296
|
-
if
|
|
297
|
-
if
|
|
335
|
+
if not isinstance(value, list):
|
|
336
|
+
if not isinstance(value, dict):
|
|
298
337
|
value = {'role': 'user', 'content': str(value)}
|
|
299
338
|
value = [value]
|
|
300
339
|
return value
|
|
301
340
|
|
|
302
|
-
def
|
|
303
|
-
if
|
|
304
|
-
argument.prop.prepared_input = self._prepare_raw_input(argument)
|
|
305
|
-
return
|
|
306
|
-
|
|
307
|
-
_non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
|
|
308
|
-
user: str = ""
|
|
309
|
-
developer: str = ""
|
|
310
|
-
|
|
341
|
+
def _non_verbose_section(self, argument) -> str:
|
|
342
|
+
"""Return non-verbose instruction section if needed."""
|
|
311
343
|
if argument.prop.suppress_verbose_output:
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
344
|
+
return (
|
|
345
|
+
"<META_INSTRUCTION/>\n"
|
|
346
|
+
"You do not output anything else, like verbose preambles or post explanation, such as "
|
|
347
|
+
"\"Sure, let me...\", \"Hope that was helpful...\", \"Yes, I can help you with that...\", etc. "
|
|
348
|
+
"Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use "
|
|
349
|
+
"indentation, etc. Never add meta instructions information to your output!\n\n"
|
|
350
|
+
)
|
|
351
|
+
return ''
|
|
352
|
+
|
|
353
|
+
def _response_format_section(self, argument) -> str:
|
|
354
|
+
"""Return response format instructions if provided."""
|
|
355
|
+
if not argument.prop.response_format:
|
|
356
|
+
return ''
|
|
357
|
+
response_format = argument.prop.response_format
|
|
358
|
+
assert response_format.get('type') is not None, 'Expected format `{ "type": "json_object" }`! See https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format'
|
|
359
|
+
if response_format["type"] == "json_object":
|
|
360
|
+
return '<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n'
|
|
361
|
+
return ''
|
|
362
|
+
|
|
363
|
+
def _context_sections(self, argument) -> list[str]:
|
|
364
|
+
"""Return static and dynamic context sections."""
|
|
365
|
+
sections: list[str] = []
|
|
366
|
+
static_ctxt, dyn_ctxt = argument.prop.instance.global_context
|
|
326
367
|
if len(static_ctxt) > 0:
|
|
327
|
-
|
|
328
|
-
|
|
368
|
+
sections.append(f"<STATIC CONTEXT/>\n{static_ctxt}\n\n")
|
|
329
369
|
if len(dyn_ctxt) > 0:
|
|
330
|
-
|
|
370
|
+
sections.append(f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n")
|
|
371
|
+
return sections
|
|
331
372
|
|
|
332
|
-
|
|
373
|
+
def _additional_context_section(self, argument) -> str:
|
|
374
|
+
"""Return additional payload context if any."""
|
|
333
375
|
if argument.prop.payload:
|
|
334
|
-
|
|
376
|
+
return f"<ADDITIONAL CONTEXT/>\n{argument.prop.payload!s}\n\n"
|
|
377
|
+
return ''
|
|
335
378
|
|
|
379
|
+
def _examples_section(self, argument) -> str:
|
|
380
|
+
"""Return examples section if provided."""
|
|
336
381
|
examples: list[str] = argument.prop.examples
|
|
337
382
|
if examples and len(examples) > 0:
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
383
|
+
return f"<EXAMPLES/>\n{examples!s}\n\n"
|
|
384
|
+
return ''
|
|
385
|
+
|
|
386
|
+
def _instruction_section(self, argument, image_files: list[str]) -> str:
|
|
387
|
+
"""Return instruction section, removing vision patterns when needed."""
|
|
388
|
+
prompt = argument.prop.prompt
|
|
389
|
+
if prompt is None or len(prompt) == 0:
|
|
390
|
+
return ''
|
|
391
|
+
value = str(prompt)
|
|
392
|
+
if len(image_files) > 0:
|
|
393
|
+
value = self._remove_vision_pattern(value)
|
|
394
|
+
return f"<INSTRUCTION/>\n{value}\n\n"
|
|
395
|
+
|
|
396
|
+
def _build_developer_prompt(self, argument, image_files: list[str]) -> str:
|
|
397
|
+
"""Assemble developer prompt content."""
|
|
398
|
+
developer = self._non_verbose_section(argument)
|
|
399
|
+
developer = f'{developer}\n' if developer else ''
|
|
400
|
+
|
|
401
|
+
parts = [
|
|
402
|
+
self._response_format_section(argument),
|
|
403
|
+
*self._context_sections(argument),
|
|
404
|
+
self._additional_context_section(argument),
|
|
405
|
+
self._examples_section(argument),
|
|
406
|
+
self._instruction_section(argument, image_files),
|
|
407
|
+
]
|
|
408
|
+
developer += ''.join(part for part in parts if part)
|
|
341
409
|
|
|
342
|
-
if argument.prop.
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
410
|
+
if argument.prop.template_suffix:
|
|
411
|
+
developer += (
|
|
412
|
+
f' You will only generate content for the placeholder `{argument.prop.template_suffix!s}` '
|
|
413
|
+
'following the instructions and the provided context information.\n\n'
|
|
414
|
+
)
|
|
415
|
+
return developer
|
|
347
416
|
|
|
417
|
+
def _build_user_suffix(self, argument, image_files: list[str]) -> str:
|
|
418
|
+
"""Prepare user content suffix."""
|
|
348
419
|
suffix: str = str(argument.prop.processed_input)
|
|
349
420
|
if len(image_files) > 0:
|
|
350
421
|
suffix = self._remove_vision_pattern(suffix)
|
|
422
|
+
return suffix
|
|
351
423
|
|
|
352
|
-
|
|
424
|
+
def _construct_user_prompt(self, user_text: str, image_files: list[str]):
|
|
425
|
+
"""Construct user prompt payload."""
|
|
426
|
+
if self.model in {
|
|
427
|
+
'o1',
|
|
428
|
+
'o3',
|
|
429
|
+
'o3-mini',
|
|
430
|
+
'o4-mini',
|
|
431
|
+
'gpt-5',
|
|
432
|
+
'gpt-5-mini',
|
|
433
|
+
'gpt-5-nano',
|
|
434
|
+
}:
|
|
435
|
+
images = [{'type': 'image_url', 'image_url': {'url': file}} for file in image_files]
|
|
436
|
+
user_prompt = {
|
|
437
|
+
"role": "user",
|
|
438
|
+
"content": [
|
|
439
|
+
*images,
|
|
440
|
+
{'type': 'text', 'text': user_text},
|
|
441
|
+
],
|
|
442
|
+
}
|
|
443
|
+
return user_prompt, images
|
|
444
|
+
return {"role": "user", "content": user_text}, None
|
|
445
|
+
|
|
446
|
+
def _apply_self_prompt(
|
|
447
|
+
self,
|
|
448
|
+
argument,
|
|
449
|
+
user_prompt,
|
|
450
|
+
developer: str,
|
|
451
|
+
user_text: str,
|
|
452
|
+
images,
|
|
453
|
+
image_files: list[str],
|
|
454
|
+
):
|
|
455
|
+
"""Apply self-prompting when requested."""
|
|
456
|
+
instance = argument.prop.instance
|
|
457
|
+
if not (instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt):
|
|
458
|
+
return user_prompt, developer
|
|
459
|
+
|
|
460
|
+
self_prompter = SelfPrompt()
|
|
461
|
+
res = self_prompter({'user': user_text, 'developer': developer})
|
|
462
|
+
if res is None:
|
|
463
|
+
UserMessage("Self-prompting failed!", raise_with=ValueError)
|
|
353
464
|
|
|
354
|
-
if
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
465
|
+
if len(image_files) > 0:
|
|
466
|
+
image_content = images if images is not None else [
|
|
467
|
+
{'type': 'image_url', 'image_url': {'url': file}} for file in image_files
|
|
468
|
+
]
|
|
469
|
+
user_prompt = {
|
|
470
|
+
"role": "user",
|
|
471
|
+
"content": [
|
|
472
|
+
*image_content,
|
|
473
|
+
{'type': 'text', 'text': res['user']},
|
|
474
|
+
],
|
|
475
|
+
}
|
|
363
476
|
else:
|
|
364
|
-
user_prompt = {
|
|
477
|
+
user_prompt = {"role": "user", "content": res['user']}
|
|
365
478
|
|
|
366
|
-
|
|
367
|
-
if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
|
|
368
|
-
self_prompter = SelfPrompt()
|
|
369
|
-
res = self_prompter({'user': user, 'developer': developer})
|
|
370
|
-
if res is None:
|
|
371
|
-
CustomUserWarning("Self-prompting failed!", raise_with=ValueError)
|
|
479
|
+
return user_prompt, res['developer']
|
|
372
480
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
user_prompt = { "role": "user", "content": res['user'] }
|
|
481
|
+
def prepare(self, argument):
|
|
482
|
+
if argument.prop.raw_input:
|
|
483
|
+
argument.prop.prepared_input = self._prepare_raw_input(argument)
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
image_files = self._handle_image_content(str(argument.prop.processed_input))
|
|
380
487
|
|
|
381
|
-
|
|
488
|
+
developer = self._build_developer_prompt(argument, image_files)
|
|
489
|
+
user_text = self._build_user_suffix(argument, image_files)
|
|
490
|
+
user_prompt, images = self._construct_user_prompt(user_text, image_files)
|
|
491
|
+
user_prompt, developer = self._apply_self_prompt(
|
|
492
|
+
argument,
|
|
493
|
+
user_prompt,
|
|
494
|
+
developer,
|
|
495
|
+
user_text,
|
|
496
|
+
images,
|
|
497
|
+
image_files,
|
|
498
|
+
)
|
|
382
499
|
|
|
383
500
|
argument.prop.prepared_input = [
|
|
384
501
|
{ "role": "developer", "content": developer },
|
|
@@ -387,24 +504,28 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
387
504
|
|
|
388
505
|
def _process_function_calls(self, res, metadata):
|
|
389
506
|
hit = False
|
|
390
|
-
if
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
507
|
+
if (
|
|
508
|
+
hasattr(res, 'choices')
|
|
509
|
+
and res.choices
|
|
510
|
+
and hasattr(res.choices[0], 'message')
|
|
511
|
+
and res.choices[0].message
|
|
512
|
+
and hasattr(res.choices[0].message, 'tool_calls')
|
|
513
|
+
and res.choices[0].message.tool_calls
|
|
514
|
+
):
|
|
515
|
+
for tool_call in res.choices[0].message.tool_calls:
|
|
516
|
+
if hit:
|
|
517
|
+
UserMessage("Multiple function calls detected in the response but only the first one will be processed.")
|
|
518
|
+
break
|
|
519
|
+
if hasattr(tool_call, 'function') and tool_call.function:
|
|
520
|
+
try:
|
|
521
|
+
args_dict = json.loads(tool_call.function.arguments)
|
|
522
|
+
except json.JSONDecodeError:
|
|
523
|
+
args_dict = {}
|
|
524
|
+
metadata['function_call'] = {
|
|
525
|
+
'name': tool_call.function.name,
|
|
526
|
+
'arguments': args_dict
|
|
527
|
+
}
|
|
528
|
+
hit = True
|
|
408
529
|
return metadata
|
|
409
530
|
|
|
410
531
|
def _prepare_request_payload(self, messages, argument):
|
|
@@ -416,13 +537,13 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
416
537
|
remaining_tokens = self.compute_remaining_tokens(messages)
|
|
417
538
|
|
|
418
539
|
if max_tokens is not None:
|
|
419
|
-
|
|
540
|
+
UserMessage(
|
|
420
541
|
"'max_tokens' is now deprecated in favor of 'max_completion_tokens', and is not compatible with o1 series models. "
|
|
421
542
|
"We handle this conversion by default for you for now but we won't in the future. "
|
|
422
543
|
"See: https://platform.openai.com/docs/api-reference/chat/create"
|
|
423
544
|
)
|
|
424
545
|
if max_tokens > self.max_response_tokens:
|
|
425
|
-
|
|
546
|
+
UserMessage(
|
|
426
547
|
f"Provided 'max_tokens' ({max_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
|
|
427
548
|
f"Truncating to {remaining_tokens} to avoid API failure."
|
|
428
549
|
)
|
|
@@ -431,13 +552,12 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
|
|
|
431
552
|
kwargs['max_completion_tokens'] = max_tokens
|
|
432
553
|
del kwargs['max_tokens']
|
|
433
554
|
|
|
434
|
-
if max_completion_tokens is not None:
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
kwargs['max_completion_tokens'] = remaining_tokens
|
|
555
|
+
if max_completion_tokens is not None and max_completion_tokens > self.max_response_tokens:
|
|
556
|
+
UserMessage(
|
|
557
|
+
f"Provided 'max_completion_tokens' ({max_completion_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
|
|
558
|
+
f"Truncating to {remaining_tokens} to avoid API failure."
|
|
559
|
+
)
|
|
560
|
+
kwargs['max_completion_tokens'] = remaining_tokens
|
|
441
561
|
|
|
442
562
|
payload = {
|
|
443
563
|
"messages": messages,
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import requests
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
import requests
|
|
5
4
|
|
|
5
|
+
from ....symbol import Result
|
|
6
6
|
from ...base import Engine
|
|
7
7
|
from ...settings import SYMAI_CONFIG
|
|
8
|
-
from ....symbol import Result
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class ApiLayerResult(Result):
|
|
@@ -14,13 +13,13 @@ class ApiLayerResult(Result):
|
|
|
14
13
|
self.raw = text
|
|
15
14
|
try:
|
|
16
15
|
dict_ = self._to_symbol(text).ast()
|
|
17
|
-
self._value = dict_
|
|
18
|
-
except:
|
|
16
|
+
self._value = dict_.get('all_text', f'OCR Engine Error: {text} - status code {status_code}')
|
|
17
|
+
except Exception:
|
|
19
18
|
self._value = f'OCR Engine Error: {text} - status code {status_code}'
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class OCREngine(Engine):
|
|
23
|
-
def __init__(self, api_key:
|
|
22
|
+
def __init__(self, api_key: str | None = None):
|
|
24
23
|
super().__init__()
|
|
25
24
|
# Opening JSON file
|
|
26
25
|
self.config = SYMAI_CONFIG
|
|
@@ -42,12 +41,11 @@ class OCREngine(Engine):
|
|
|
42
41
|
}
|
|
43
42
|
|
|
44
43
|
def forward(self, argument):
|
|
45
|
-
kwargs = argument.kwargs
|
|
46
44
|
image_url = argument.prop.image
|
|
47
45
|
|
|
48
46
|
if image_url.startswith("file://"):
|
|
49
47
|
file_path = Path(image_url[7:]).resolve()
|
|
50
|
-
with open(
|
|
48
|
+
with file_path.open("rb") as file:
|
|
51
49
|
payload = file.read()
|
|
52
50
|
url = "https://api.apilayer.com/image_to_text/upload"
|
|
53
51
|
response = requests.request("POST", url, headers=self.headers, data=payload)
|
|
@@ -15,10 +15,7 @@ class OutputEngine(Engine):
|
|
|
15
15
|
args = [] if args is None else args
|
|
16
16
|
kwargs = {} if kwargs is None else kwargs
|
|
17
17
|
if expr:
|
|
18
|
-
if processed
|
|
19
|
-
res = expr(processed, *args, **kwargs)
|
|
20
|
-
else:
|
|
21
|
-
res = expr(*args, **kwargs)
|
|
18
|
+
res = expr(processed, *args, **kwargs) if processed else expr(*args, **kwargs)
|
|
22
19
|
|
|
23
20
|
metadata = {}
|
|
24
21
|
result = {
|