symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. symai/__init__.py +96 -64
  2. symai/backend/base.py +93 -80
  3. symai/backend/engines/drawing/engine_bfl.py +12 -11
  4. symai/backend/engines/drawing/engine_gpt_image.py +108 -87
  5. symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
  6. symai/backend/engines/embedding/engine_openai.py +3 -5
  7. symai/backend/engines/execute/engine_python.py +6 -5
  8. symai/backend/engines/files/engine_io.py +74 -67
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
  11. symai/backend/engines/index/engine_pinecone.py +23 -24
  12. symai/backend/engines/index/engine_vectordb.py +16 -14
  13. symai/backend/engines/lean/engine_lean4.py +38 -34
  14. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  15. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
  17. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
  18. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
  19. symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
  20. symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
  21. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
  22. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
  23. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
  24. symai/backend/engines/ocr/engine_apilayer.py +6 -8
  25. symai/backend/engines/output/engine_stdout.py +1 -4
  26. symai/backend/engines/search/engine_openai.py +7 -7
  27. symai/backend/engines/search/engine_perplexity.py +5 -5
  28. symai/backend/engines/search/engine_serpapi.py +12 -14
  29. symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
  30. symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
  31. symai/backend/engines/text_to_speech/engine_openai.py +5 -7
  32. symai/backend/engines/text_vision/engine_clip.py +7 -11
  33. symai/backend/engines/userinput/engine_console.py +3 -3
  34. symai/backend/engines/webscraping/engine_requests.py +81 -48
  35. symai/backend/mixin/__init__.py +13 -0
  36. symai/backend/mixin/anthropic.py +4 -2
  37. symai/backend/mixin/deepseek.py +2 -0
  38. symai/backend/mixin/google.py +2 -0
  39. symai/backend/mixin/openai.py +11 -3
  40. symai/backend/settings.py +83 -16
  41. symai/chat.py +101 -78
  42. symai/collect/__init__.py +7 -1
  43. symai/collect/dynamic.py +77 -69
  44. symai/collect/pipeline.py +35 -27
  45. symai/collect/stats.py +75 -63
  46. symai/components.py +198 -169
  47. symai/constraints.py +15 -12
  48. symai/core.py +698 -359
  49. symai/core_ext.py +32 -34
  50. symai/endpoints/api.py +80 -73
  51. symai/extended/.DS_Store +0 -0
  52. symai/extended/__init__.py +46 -12
  53. symai/extended/api_builder.py +11 -8
  54. symai/extended/arxiv_pdf_parser.py +13 -12
  55. symai/extended/bibtex_parser.py +2 -3
  56. symai/extended/conversation.py +101 -90
  57. symai/extended/document.py +17 -10
  58. symai/extended/file_merger.py +18 -13
  59. symai/extended/graph.py +18 -13
  60. symai/extended/html_style_template.py +2 -4
  61. symai/extended/interfaces/blip_2.py +1 -2
  62. symai/extended/interfaces/clip.py +1 -2
  63. symai/extended/interfaces/console.py +7 -1
  64. symai/extended/interfaces/dall_e.py +1 -1
  65. symai/extended/interfaces/flux.py +1 -1
  66. symai/extended/interfaces/gpt_image.py +1 -1
  67. symai/extended/interfaces/input.py +1 -1
  68. symai/extended/interfaces/llava.py +0 -1
  69. symai/extended/interfaces/naive_vectordb.py +7 -8
  70. symai/extended/interfaces/naive_webscraping.py +1 -1
  71. symai/extended/interfaces/ocr.py +1 -1
  72. symai/extended/interfaces/pinecone.py +6 -5
  73. symai/extended/interfaces/serpapi.py +1 -1
  74. symai/extended/interfaces/terminal.py +2 -3
  75. symai/extended/interfaces/tts.py +1 -1
  76. symai/extended/interfaces/whisper.py +1 -1
  77. symai/extended/interfaces/wolframalpha.py +1 -1
  78. symai/extended/metrics/__init__.py +11 -1
  79. symai/extended/metrics/similarity.py +11 -13
  80. symai/extended/os_command.py +17 -16
  81. symai/extended/packages/__init__.py +29 -3
  82. symai/extended/packages/symdev.py +19 -16
  83. symai/extended/packages/sympkg.py +12 -9
  84. symai/extended/packages/symrun.py +21 -19
  85. symai/extended/repo_cloner.py +11 -10
  86. symai/extended/seo_query_optimizer.py +1 -2
  87. symai/extended/solver.py +20 -23
  88. symai/extended/summarizer.py +4 -3
  89. symai/extended/taypan_interpreter.py +10 -12
  90. symai/extended/vectordb.py +99 -82
  91. symai/formatter/__init__.py +9 -1
  92. symai/formatter/formatter.py +12 -16
  93. symai/formatter/regex.py +62 -63
  94. symai/functional.py +176 -122
  95. symai/imports.py +136 -127
  96. symai/interfaces.py +56 -27
  97. symai/memory.py +14 -13
  98. symai/misc/console.py +49 -39
  99. symai/misc/loader.py +5 -3
  100. symai/models/__init__.py +17 -1
  101. symai/models/base.py +269 -181
  102. symai/models/errors.py +0 -1
  103. symai/ops/__init__.py +32 -22
  104. symai/ops/measures.py +11 -15
  105. symai/ops/primitives.py +348 -228
  106. symai/post_processors.py +32 -28
  107. symai/pre_processors.py +39 -41
  108. symai/processor.py +6 -4
  109. symai/prompts.py +59 -45
  110. symai/server/huggingface_server.py +23 -20
  111. symai/server/llama_cpp_server.py +7 -5
  112. symai/shell.py +3 -4
  113. symai/shellsv.py +499 -375
  114. symai/strategy.py +517 -287
  115. symai/symbol.py +111 -116
  116. symai/utils.py +42 -36
  117. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
  118. symbolicai-1.0.0.dist-info/RECORD +163 -0
  119. symbolicai-0.20.2.dist-info/RECORD +0 -162
  120. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
  121. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
  122. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
  123. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -7,9 +7,8 @@ import openai
7
7
  import tiktoken
8
8
 
9
9
  from ....components import SelfPrompt
10
- from ....misc.console import ConsoleStyle
11
10
  from ....symbol import Symbol
12
- from ....utils import CustomUserWarning, encode_media_frames
11
+ from ....utils import UserMessage, encode_media_frames
13
12
  from ...base import Engine
14
13
  from ...mixin.openai import OpenAIMixin
15
14
  from ...settings import SYMAI_CONFIG
@@ -36,7 +35,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
36
35
  self.name = self.__class__.__name__
37
36
  try:
38
37
  self.tokenizer = tiktoken.encoding_for_model(self.model)
39
- except Exception as e:
38
+ except Exception:
40
39
  self.tokenizer = tiktoken.get_encoding('o200k_base')
41
40
  self.max_context_tokens = self.api_max_context_tokens()
42
41
  self.max_response_tokens = self.api_max_response_tokens()
@@ -45,7 +44,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
45
44
  try:
46
45
  self.client = openai.Client(api_key=openai.api_key)
47
46
  except Exception as e:
48
- CustomUserWarning(f'Failed to initialize OpenAI client. Please check your OpenAI library version. Caused by: {e}', raise_with=ValueError)
47
+ UserMessage(f'Failed to initialize OpenAI client. Please check your OpenAI library version. Caused by: {e}', raise_with=ValueError)
49
48
 
50
49
  def id(self) -> str:
51
50
  if self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
@@ -82,7 +81,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
82
81
  tokens_per_message = 3
83
82
  tokens_per_name = 1
84
83
  else:
85
- CustomUserWarning(
84
+ UserMessage(
86
85
  f"'num_tokens_from_messages()' is not implemented for model {self.model}. "
87
86
  "See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken for information on how messages are converted to tokens.",
88
87
  raise_with=NotImplementedError
@@ -92,7 +91,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
92
91
  for message in messages:
93
92
  num_tokens += tokens_per_message
94
93
  for key, value in message.items():
95
- if type(value) == str:
94
+ if isinstance(value, str):
96
95
  num_tokens += len(self.tokenizer.encode(value, disallowed_special=()))
97
96
  else:
98
97
  for v in value:
@@ -122,9 +121,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
122
121
  parts = _extract_pattern(content)
123
122
  for p in parts:
124
123
  img_ = p.strip()
125
- if img_.startswith('http'):
126
- image_files.append(img_)
127
- elif img_.startswith('data:image'):
124
+ if img_.startswith('http') or img_.startswith('data:image'):
128
125
  image_files.append(img_)
129
126
  else:
130
127
  max_frames_spacing = 50
@@ -134,7 +131,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
134
131
  max_used_frames, img_ = img_.split(':')
135
132
  max_used_frames = int(max_used_frames)
136
133
  if max_used_frames < 1 or max_used_frames > max_frames_spacing:
137
- CustomUserWarning(f"Invalid max_used_frames value: {max_used_frames}. Expected value between 1 and {max_frames_spacing}", raise_with=ValueError)
134
+ UserMessage(f"Invalid max_used_frames value: {max_used_frames}. Expected value between 1 and {max_frames_spacing}", raise_with=ValueError)
138
135
  buffer, ext = encode_media_frames(img_)
139
136
  if len(buffer) > 1:
140
137
  step = len(buffer) // max_frames_spacing # max frames spacing
@@ -146,7 +143,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
146
143
  elif len(buffer) == 1:
147
144
  image_files.append(f"data:image/{ext};base64,{buffer[0]}")
148
145
  else:
149
- print('No frames found or error in encoding frames')
146
+ UserMessage('No frames found or error in encoding frames')
150
147
  return image_files
151
148
 
152
149
  def _remove_vision_pattern(self, text: str) -> str:
@@ -154,16 +151,79 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
154
151
  pattern = r'<<vision:(.*?):>>'
155
152
  return re.sub(pattern, '', text)
156
153
 
157
- def truncate(self, prompts: list[dict], truncation_percentage: float | None, truncation_type: str) -> list[dict]:
158
- """Main truncation method"""
159
- def _slice_tokens(tokens, new_len, truncation_type):
160
- """Slice tokens based on truncation type"""
161
- new_len = max(100, new_len) # Ensure minimum token length
162
- return tokens[-new_len:] if truncation_type == 'head' else tokens[:new_len] # else 'tail'
154
+ def _slice_tokens(self, tokens, new_len, truncation_type):
155
+ """Slice tokens based on truncation type."""
156
+ new_len = max(100, new_len) # Ensure minimum token length
157
+ return tokens[-new_len:] if truncation_type == 'head' else tokens[:new_len] # else 'tail'
163
158
 
159
+ def _validate_truncation_prompts(self, prompts: list[dict]) -> bool:
160
+ """Validate prompt structure before truncation."""
164
161
  if len(prompts) != 2 and all(prompt['role'] in ['developer', 'user'] for prompt in prompts):
165
162
  # Only support developer and user prompts
166
- CustomUserWarning(f"Token truncation currently supports only two messages, from 'user' and 'developer' (got {len(prompts)}). Returning original prompts.")
163
+ UserMessage(
164
+ f"Token truncation currently supports only two messages, from 'user' and 'developer' (got {len(prompts)}). Returning original prompts."
165
+ )
166
+ return False
167
+ return True
168
+
169
+ def _collect_user_tokens(
170
+ self,
171
+ user_prompt: dict,
172
+ ) -> tuple[list[int], bool]:
173
+ """Collect user tokens and detect unsupported content."""
174
+ user_tokens: list[int] = []
175
+ user_content = user_prompt['content']
176
+ if isinstance(user_content, str):
177
+ user_tokens.extend(Symbol(user_content).tokens)
178
+ return user_tokens, False
179
+ if isinstance(user_content, list):
180
+ for content_item in user_content:
181
+ if isinstance(content_item, dict):
182
+ if content_item.get('type') == 'text':
183
+ user_tokens.extend(Symbol(content_item['text']).tokens)
184
+ else:
185
+ return user_tokens, True
186
+ else:
187
+ UserMessage(
188
+ f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python",
189
+ raise_with=ValueError,
190
+ )
191
+ return user_tokens, False
192
+ return UserMessage(
193
+ f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python",
194
+ raise_with=ValueError,
195
+ )
196
+
197
+ def _truncate_single_prompt_exceed(
198
+ self,
199
+ system_tokens,
200
+ user_tokens,
201
+ system_token_count,
202
+ user_token_count,
203
+ max_prompt_tokens,
204
+ truncation_type,
205
+ ):
206
+ """Handle truncation when only one prompt exceeds the limit."""
207
+ half_limit = max_prompt_tokens / 2
208
+ if user_token_count > half_limit and system_token_count <= half_limit:
209
+ new_user_len = max_prompt_tokens - system_token_count
210
+ new_user_tokens = self._slice_tokens(user_tokens, new_user_len, truncation_type)
211
+ return [
212
+ {'role': 'developer', 'content': self.tokenizer.decode(system_tokens)},
213
+ {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(new_user_tokens)}]},
214
+ ]
215
+ if system_token_count > half_limit and user_token_count <= half_limit:
216
+ new_system_len = max_prompt_tokens - user_token_count
217
+ new_system_tokens = self._slice_tokens(system_tokens, new_system_len, truncation_type)
218
+ return [
219
+ {'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
220
+ {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]},
221
+ ]
222
+ return None
223
+
224
+ def truncate(self, prompts: list[dict], truncation_percentage: float | None, truncation_type: str) -> list[dict]:
225
+ """Main truncation method"""
226
+ if not self._validate_truncation_prompts(prompts):
167
227
  return prompts
168
228
 
169
229
  if truncation_percentage is None:
@@ -177,23 +237,9 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
177
237
  system_tokens = Symbol(system_prompt['content']).tokens
178
238
  user_tokens = []
179
239
 
180
- if isinstance(user_prompt['content'], str):
181
- # Default input format
182
- user_tokens.extend(Symbol(user_prompt['content']).tokens)
183
- elif isinstance(user_prompt['content'], list):
184
- for content_item in user_prompt['content']:
185
- # Image input format
186
- if isinstance(content_item, dict):
187
- if content_item.get('type') == 'text':
188
- user_tokens.extend(Symbol(content_item['text']).tokens)
189
- else:
190
- # Image content; return original since not supported
191
- return prompts
192
- else:
193
- CustomUserWarning(f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
194
- else:
195
- # Unknown input format
196
- CustomUserWarning(f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
240
+ user_tokens, should_return_original = self._collect_user_tokens(user_prompt)
241
+ if should_return_original:
242
+ return prompts
197
243
 
198
244
  system_token_count = len(system_tokens)
199
245
  user_token_count = len(user_tokens)
@@ -208,7 +254,7 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
208
254
  if total_tokens <= max_prompt_tokens:
209
255
  return prompts
210
256
 
211
- CustomUserWarning(
257
+ UserMessage(
212
258
  f"Executing {truncation_type} truncation to fit within {max_prompt_tokens} tokens. "
213
259
  f"Combined prompts ({total_tokens} tokens) exceed maximum allowed tokens "
214
260
  f"of {max_prompt_tokens} ({truncation_percentage*100:.1f}% of context). "
@@ -218,23 +264,16 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
218
264
  f"Choose 'truncation_type' as 'head' to keep the end of prompts or 'tail' to keep the beginning."
219
265
  )
220
266
 
221
- # Case 1: Only user prompt exceeds
222
- if user_token_count > max_prompt_tokens/2 and system_token_count <= max_prompt_tokens/2:
223
- new_user_len = max_prompt_tokens - system_token_count
224
- new_user_tokens = _slice_tokens(user_tokens, new_user_len, truncation_type)
225
- return [
226
- {'role': 'developer', 'content': self.tokenizer.decode(system_tokens)},
227
- {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(new_user_tokens)}]}
228
- ]
229
-
230
- # Case 2: Only developer prompt exceeds
231
- if system_token_count > max_prompt_tokens/2 and user_token_count <= max_prompt_tokens/2:
232
- new_system_len = max_prompt_tokens - user_token_count
233
- new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
234
- return [
235
- {'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
236
- {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]}
237
- ]
267
+ single_prompt_adjustment = self._truncate_single_prompt_exceed(
268
+ system_tokens,
269
+ user_tokens,
270
+ system_token_count,
271
+ user_token_count,
272
+ max_prompt_tokens,
273
+ truncation_type,
274
+ )
275
+ if single_prompt_adjustment is not None:
276
+ return single_prompt_adjustment
238
277
 
239
278
  # Case 3: Both exceed - reduce proportionally
240
279
  system_ratio = system_token_count / total_tokens
@@ -246,8 +285,8 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
246
285
  new_system_len += distribute_tokens // 2
247
286
  new_user_len += distribute_tokens // 2
248
287
 
249
- new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
250
- new_user_tokens = _slice_tokens(user_tokens, new_user_len, truncation_type)
288
+ new_system_tokens = self._slice_tokens(system_tokens, new_system_len, truncation_type)
289
+ new_user_tokens = self._slice_tokens(user_tokens, new_user_len, truncation_type)
251
290
 
252
291
  return [
253
292
  {'role': 'developer', 'content': self.tokenizer.decode(new_system_tokens)},
@@ -268,18 +307,18 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
268
307
  except Exception as e:
269
308
  if openai.api_key is None or openai.api_key == '':
270
309
  msg = 'OpenAI API key is not set. Please set it in the config file or pass it as an argument to the command method.'
271
- logging.error(msg)
310
+ UserMessage(msg)
272
311
  if self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] is None or self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] == '':
273
- CustomUserWarning(msg, raise_with=ValueError)
312
+ UserMessage(msg, raise_with=ValueError)
274
313
  openai.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
275
314
 
276
315
  callback = self.client.chat.completions.create
277
- kwargs['model'] = kwargs['model'] if 'model' in kwargs else self.model
316
+ kwargs['model'] = kwargs.get('model', self.model)
278
317
 
279
318
  if except_remedy is not None:
280
319
  res = except_remedy(self, e, callback, argument)
281
320
  else:
282
- CustomUserWarning(f'Error during generation. Caused by: {e}', raise_with=ValueError)
321
+ UserMessage(f'Error during generation. Caused by: {e}', raise_with=ValueError)
283
322
 
284
323
  metadata = {'raw_output': res}
285
324
  if payload.get('tools'):
@@ -290,95 +329,173 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
290
329
 
291
330
  def _prepare_raw_input(self, argument):
292
331
  if not argument.prop.processed_input:
293
- CustomUserWarning('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
332
+ UserMessage('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
294
333
  value = argument.prop.processed_input
295
334
  # convert to dict if not already
296
- if type(value) != list:
297
- if type(value) != dict:
335
+ if not isinstance(value, list):
336
+ if not isinstance(value, dict):
298
337
  value = {'role': 'user', 'content': str(value)}
299
338
  value = [value]
300
339
  return value
301
340
 
302
- def prepare(self, argument):
303
- if argument.prop.raw_input:
304
- argument.prop.prepared_input = self._prepare_raw_input(argument)
305
- return
306
-
307
- _non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
308
- user: str = ""
309
- developer: str = ""
310
-
341
+ def _non_verbose_section(self, argument) -> str:
342
+ """Return non-verbose instruction section if needed."""
311
343
  if argument.prop.suppress_verbose_output:
312
- developer += _non_verbose_output
313
- developer = f'{developer}\n' if developer and len(developer) > 0 else ''
314
-
315
- if argument.prop.response_format:
316
- _rsp_fmt = argument.prop.response_format
317
- assert _rsp_fmt.get('type') is not None, 'Expected format `{ "type": "json_object" }`! See https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format'
318
- if _rsp_fmt["type"] == "json_object":
319
- # OpenAI docs:
320
- # "Important: when using JSON mode, you must also instruct the model
321
- # to produce JSON yourself via a developer or user message"
322
- developer += f'<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n'
323
-
324
- ref = argument.prop.instance
325
- static_ctxt, dyn_ctxt = ref.global_context
344
+ return (
345
+ "<META_INSTRUCTION/>\n"
346
+ "You do not output anything else, like verbose preambles or post explanation, such as "
347
+ "\"Sure, let me...\", \"Hope that was helpful...\", \"Yes, I can help you with that...\", etc. "
348
+ "Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use "
349
+ "indentation, etc. Never add meta instructions information to your output!\n\n"
350
+ )
351
+ return ''
352
+
353
+ def _response_format_section(self, argument) -> str:
354
+ """Return response format instructions if provided."""
355
+ if not argument.prop.response_format:
356
+ return ''
357
+ response_format = argument.prop.response_format
358
+ assert response_format.get('type') is not None, 'Expected format `{ "type": "json_object" }`! See https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format'
359
+ if response_format["type"] == "json_object":
360
+ return '<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n'
361
+ return ''
362
+
363
+ def _context_sections(self, argument) -> list[str]:
364
+ """Return static and dynamic context sections."""
365
+ sections: list[str] = []
366
+ static_ctxt, dyn_ctxt = argument.prop.instance.global_context
326
367
  if len(static_ctxt) > 0:
327
- developer += f"<STATIC CONTEXT/>\n{static_ctxt}\n\n"
328
-
368
+ sections.append(f"<STATIC CONTEXT/>\n{static_ctxt}\n\n")
329
369
  if len(dyn_ctxt) > 0:
330
- developer += f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n"
370
+ sections.append(f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n")
371
+ return sections
331
372
 
332
- payload = argument.prop.payload
373
+ def _additional_context_section(self, argument) -> str:
374
+ """Return additional payload context if any."""
333
375
  if argument.prop.payload:
334
- developer += f"<ADDITIONAL CONTEXT/>\n{str(payload)}\n\n"
376
+ return f"<ADDITIONAL CONTEXT/>\n{argument.prop.payload!s}\n\n"
377
+ return ''
335
378
 
379
+ def _examples_section(self, argument) -> str:
380
+ """Return examples section if provided."""
336
381
  examples: list[str] = argument.prop.examples
337
382
  if examples and len(examples) > 0:
338
- developer += f"<EXAMPLES/>\n{str(examples)}\n\n"
339
-
340
- image_files = self._handle_image_content(str(argument.prop.processed_input))
383
+ return f"<EXAMPLES/>\n{examples!s}\n\n"
384
+ return ''
385
+
386
+ def _instruction_section(self, argument, image_files: list[str]) -> str:
387
+ """Return instruction section, removing vision patterns when needed."""
388
+ prompt = argument.prop.prompt
389
+ if prompt is None or len(prompt) == 0:
390
+ return ''
391
+ value = str(prompt)
392
+ if len(image_files) > 0:
393
+ value = self._remove_vision_pattern(value)
394
+ return f"<INSTRUCTION/>\n{value}\n\n"
395
+
396
+ def _build_developer_prompt(self, argument, image_files: list[str]) -> str:
397
+ """Assemble developer prompt content."""
398
+ developer = self._non_verbose_section(argument)
399
+ developer = f'{developer}\n' if developer else ''
400
+
401
+ parts = [
402
+ self._response_format_section(argument),
403
+ *self._context_sections(argument),
404
+ self._additional_context_section(argument),
405
+ self._examples_section(argument),
406
+ self._instruction_section(argument, image_files),
407
+ ]
408
+ developer += ''.join(part for part in parts if part)
341
409
 
342
- if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
343
- val = str(argument.prop.prompt)
344
- if len(image_files) > 0:
345
- val = self._remove_vision_pattern(val)
346
- developer += f"<INSTRUCTION/>\n{val}\n\n"
410
+ if argument.prop.template_suffix:
411
+ developer += (
412
+ f' You will only generate content for the placeholder `{argument.prop.template_suffix!s}` '
413
+ 'following the instructions and the provided context information.\n\n'
414
+ )
415
+ return developer
347
416
 
417
+ def _build_user_suffix(self, argument, image_files: list[str]) -> str:
418
+ """Prepare user content suffix."""
348
419
  suffix: str = str(argument.prop.processed_input)
349
420
  if len(image_files) > 0:
350
421
  suffix = self._remove_vision_pattern(suffix)
422
+ return suffix
351
423
 
352
- user += f"{suffix}"
424
+ def _construct_user_prompt(self, user_text: str, image_files: list[str]):
425
+ """Construct user prompt payload."""
426
+ if self.model in {
427
+ 'o1',
428
+ 'o3',
429
+ 'o3-mini',
430
+ 'o4-mini',
431
+ 'gpt-5',
432
+ 'gpt-5-mini',
433
+ 'gpt-5-nano',
434
+ }:
435
+ images = [{'type': 'image_url', 'image_url': {'url': file}} for file in image_files]
436
+ user_prompt = {
437
+ "role": "user",
438
+ "content": [
439
+ *images,
440
+ {'type': 'text', 'text': user_text},
441
+ ],
442
+ }
443
+ return user_prompt, images
444
+ return {"role": "user", "content": user_text}, None
445
+
446
+ def _apply_self_prompt(
447
+ self,
448
+ argument,
449
+ user_prompt,
450
+ developer: str,
451
+ user_text: str,
452
+ images,
453
+ image_files: list[str],
454
+ ):
455
+ """Apply self-prompting when requested."""
456
+ instance = argument.prop.instance
457
+ if not (instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt):
458
+ return user_prompt, developer
459
+
460
+ self_prompter = SelfPrompt()
461
+ res = self_prompter({'user': user_text, 'developer': developer})
462
+ if res is None:
463
+ UserMessage("Self-prompting failed!", raise_with=ValueError)
353
464
 
354
- if argument.prop.template_suffix:
355
- developer += f' You will only generate content for the placeholder `{str(argument.prop.template_suffix)}` following the instructions and the provided context information.\n\n'
356
-
357
- if self.model == 'o1':
358
- images = [{ 'type': 'image_url', "image_url": { "url": file }} for file in image_files]
359
- user_prompt = { "role": "user", "content": [
360
- *images,
361
- { 'type': 'text', 'text': user }
362
- ]}
465
+ if len(image_files) > 0:
466
+ image_content = images if images is not None else [
467
+ {'type': 'image_url', 'image_url': {'url': file}} for file in image_files
468
+ ]
469
+ user_prompt = {
470
+ "role": "user",
471
+ "content": [
472
+ *image_content,
473
+ {'type': 'text', 'text': res['user']},
474
+ ],
475
+ }
363
476
  else:
364
- user_prompt = { "role": "user", "content": user }
477
+ user_prompt = {"role": "user", "content": res['user']}
365
478
 
366
- # First check if the `Symbol` instance has the flag set, otherwise check if it was passed as an argument to a method
367
- if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
368
- self_prompter = SelfPrompt()
369
- res = self_prompter({'user': user, 'developer': developer})
370
- if res is None:
371
- CustomUserWarning("Self-prompting failed!", raise_with=ValueError)
479
+ return user_prompt, res['developer']
372
480
 
373
- if len(image_files) > 0:
374
- user_prompt = { "role": "user", "content": [
375
- *images,
376
- { 'type': 'text', 'text': res['user'] }
377
- ]}
378
- else:
379
- user_prompt = { "role": "user", "content": res['user'] }
481
+ def prepare(self, argument):
482
+ if argument.prop.raw_input:
483
+ argument.prop.prepared_input = self._prepare_raw_input(argument)
484
+ return
485
+
486
+ image_files = self._handle_image_content(str(argument.prop.processed_input))
380
487
 
381
- developer = res['developer']
488
+ developer = self._build_developer_prompt(argument, image_files)
489
+ user_text = self._build_user_suffix(argument, image_files)
490
+ user_prompt, images = self._construct_user_prompt(user_text, image_files)
491
+ user_prompt, developer = self._apply_self_prompt(
492
+ argument,
493
+ user_prompt,
494
+ developer,
495
+ user_text,
496
+ images,
497
+ image_files,
498
+ )
382
499
 
383
500
  argument.prop.prepared_input = [
384
501
  { "role": "developer", "content": developer },
@@ -387,24 +504,28 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
387
504
 
388
505
  def _process_function_calls(self, res, metadata):
389
506
  hit = False
390
- if hasattr(res, 'choices') and res.choices:
391
- choice = res.choices[0]
392
- if hasattr(choice, 'message') and choice.message:
393
- if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
394
- for tool_call in choice.message.tool_calls:
395
- if hit:
396
- CustomUserWarning("Multiple function calls detected in the response but only the first one will be processed.")
397
- break
398
- if hasattr(tool_call, 'function') and tool_call.function:
399
- try:
400
- args_dict = json.loads(tool_call.function.arguments)
401
- except json.JSONDecodeError:
402
- args_dict = {}
403
- metadata['function_call'] = {
404
- 'name': tool_call.function.name,
405
- 'arguments': args_dict
406
- }
407
- hit = True
507
+ if (
508
+ hasattr(res, 'choices')
509
+ and res.choices
510
+ and hasattr(res.choices[0], 'message')
511
+ and res.choices[0].message
512
+ and hasattr(res.choices[0].message, 'tool_calls')
513
+ and res.choices[0].message.tool_calls
514
+ ):
515
+ for tool_call in res.choices[0].message.tool_calls:
516
+ if hit:
517
+ UserMessage("Multiple function calls detected in the response but only the first one will be processed.")
518
+ break
519
+ if hasattr(tool_call, 'function') and tool_call.function:
520
+ try:
521
+ args_dict = json.loads(tool_call.function.arguments)
522
+ except json.JSONDecodeError:
523
+ args_dict = {}
524
+ metadata['function_call'] = {
525
+ 'name': tool_call.function.name,
526
+ 'arguments': args_dict
527
+ }
528
+ hit = True
408
529
  return metadata
409
530
 
410
531
  def _prepare_request_payload(self, messages, argument):
@@ -416,13 +537,13 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
416
537
  remaining_tokens = self.compute_remaining_tokens(messages)
417
538
 
418
539
  if max_tokens is not None:
419
- CustomUserWarning(
540
+ UserMessage(
420
541
  "'max_tokens' is now deprecated in favor of 'max_completion_tokens', and is not compatible with o1 series models. "
421
542
  "We handle this conversion by default for you for now but we won't in the future. "
422
543
  "See: https://platform.openai.com/docs/api-reference/chat/create"
423
544
  )
424
545
  if max_tokens > self.max_response_tokens:
425
- CustomUserWarning(
546
+ UserMessage(
426
547
  f"Provided 'max_tokens' ({max_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
427
548
  f"Truncating to {remaining_tokens} to avoid API failure."
428
549
  )
@@ -431,13 +552,12 @@ class GPTXReasoningEngine(Engine, OpenAIMixin):
431
552
  kwargs['max_completion_tokens'] = max_tokens
432
553
  del kwargs['max_tokens']
433
554
 
434
- if max_completion_tokens is not None:
435
- if max_completion_tokens > self.max_response_tokens:
436
- CustomUserWarning(
437
- f"Provided 'max_completion_tokens' ({max_completion_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
438
- f"Truncating to {remaining_tokens} to avoid API failure."
439
- )
440
- kwargs['max_completion_tokens'] = remaining_tokens
555
+ if max_completion_tokens is not None and max_completion_tokens > self.max_response_tokens:
556
+ UserMessage(
557
+ f"Provided 'max_completion_tokens' ({max_completion_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
558
+ f"Truncating to {remaining_tokens} to avoid API failure."
559
+ )
560
+ kwargs['max_completion_tokens'] = remaining_tokens
441
561
 
442
562
  payload = {
443
563
  "messages": messages,
@@ -1,11 +1,10 @@
1
- import requests
2
1
  from pathlib import Path
3
2
 
4
- from typing import Optional
3
+ import requests
5
4
 
5
+ from ....symbol import Result
6
6
  from ...base import Engine
7
7
  from ...settings import SYMAI_CONFIG
8
- from ....symbol import Result
9
8
 
10
9
 
11
10
  class ApiLayerResult(Result):
@@ -14,13 +13,13 @@ class ApiLayerResult(Result):
14
13
  self.raw = text
15
14
  try:
16
15
  dict_ = self._to_symbol(text).ast()
17
- self._value = dict_['all_text'] if 'all_text' in dict_ else f'OCR Engine Error: {text} - status code {status_code}'
18
- except:
16
+ self._value = dict_.get('all_text', f'OCR Engine Error: {text} - status code {status_code}')
17
+ except Exception:
19
18
  self._value = f'OCR Engine Error: {text} - status code {status_code}'
20
19
 
21
20
 
22
21
  class OCREngine(Engine):
23
- def __init__(self, api_key: Optional[str] = None):
22
+ def __init__(self, api_key: str | None = None):
24
23
  super().__init__()
25
24
  # Opening JSON file
26
25
  self.config = SYMAI_CONFIG
@@ -42,12 +41,11 @@ class OCREngine(Engine):
42
41
  }
43
42
 
44
43
  def forward(self, argument):
45
- kwargs = argument.kwargs
46
44
  image_url = argument.prop.image
47
45
 
48
46
  if image_url.startswith("file://"):
49
47
  file_path = Path(image_url[7:]).resolve()
50
- with open(file_path, "rb") as file:
48
+ with file_path.open("rb") as file:
51
49
  payload = file.read()
52
50
  url = "https://api.apilayer.com/image_to_text/upload"
53
51
  response = requests.request("POST", url, headers=self.headers, data=payload)
@@ -15,10 +15,7 @@ class OutputEngine(Engine):
15
15
  args = [] if args is None else args
16
16
  kwargs = {} if kwargs is None else kwargs
17
17
  if expr:
18
- if processed:
19
- res = expr(processed, *args, **kwargs)
20
- else:
21
- res = expr(*args, **kwargs)
18
+ res = expr(processed, *args, **kwargs) if processed else expr(*args, **kwargs)
22
19
 
23
20
  metadata = {}
24
21
  result = {