PyPI - symbolicai - Versions diffs - 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

symbolicai 0.20.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

symai/__init__.py +96 -64
symai/backend/base.py +93 -80
symai/backend/engines/drawing/engine_bfl.py +12 -11
symai/backend/engines/drawing/engine_gpt_image.py +108 -87
symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
symai/backend/engines/embedding/engine_openai.py +3 -5
symai/backend/engines/execute/engine_python.py +6 -5
symai/backend/engines/files/engine_io.py +74 -67
symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
symai/backend/engines/index/engine_pinecone.py +23 -24
symai/backend/engines/index/engine_vectordb.py +16 -14
symai/backend/engines/lean/engine_lean4.py +38 -34
symai/backend/engines/neurosymbolic/__init__.py +41 -13
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
symai/backend/engines/ocr/engine_apilayer.py +6 -8
symai/backend/engines/output/engine_stdout.py +1 -4
symai/backend/engines/search/engine_openai.py +7 -7
symai/backend/engines/search/engine_perplexity.py +5 -5
symai/backend/engines/search/engine_serpapi.py +12 -14
symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
symai/backend/engines/text_to_speech/engine_openai.py +5 -7
symai/backend/engines/text_vision/engine_clip.py +7 -11
symai/backend/engines/userinput/engine_console.py +3 -3
symai/backend/engines/webscraping/engine_requests.py +81 -48
symai/backend/mixin/__init__.py +13 -0
symai/backend/mixin/anthropic.py +4 -2
symai/backend/mixin/deepseek.py +2 -0
symai/backend/mixin/google.py +2 -0
symai/backend/mixin/openai.py +11 -3
symai/backend/settings.py +83 -16
symai/chat.py +101 -78
symai/collect/__init__.py +7 -1
symai/collect/dynamic.py +77 -69
symai/collect/pipeline.py +35 -27
symai/collect/stats.py +75 -63
symai/components.py +198 -169
symai/constraints.py +15 -12
symai/core.py +698 -359
symai/core_ext.py +32 -34
symai/endpoints/api.py +80 -73
symai/extended/.DS_Store +0 -0
symai/extended/__init__.py +46 -12
symai/extended/api_builder.py +11 -8
symai/extended/arxiv_pdf_parser.py +13 -12
symai/extended/bibtex_parser.py +2 -3
symai/extended/conversation.py +101 -90
symai/extended/document.py +17 -10
symai/extended/file_merger.py +18 -13
symai/extended/graph.py +18 -13
symai/extended/html_style_template.py +2 -4
symai/extended/interfaces/blip_2.py +1 -2
symai/extended/interfaces/clip.py +1 -2
symai/extended/interfaces/console.py +7 -1
symai/extended/interfaces/dall_e.py +1 -1
symai/extended/interfaces/flux.py +1 -1
symai/extended/interfaces/gpt_image.py +1 -1
symai/extended/interfaces/input.py +1 -1
symai/extended/interfaces/llava.py +0 -1
symai/extended/interfaces/naive_vectordb.py +7 -8
symai/extended/interfaces/naive_webscraping.py +1 -1
symai/extended/interfaces/ocr.py +1 -1
symai/extended/interfaces/pinecone.py +6 -5
symai/extended/interfaces/serpapi.py +1 -1
symai/extended/interfaces/terminal.py +2 -3
symai/extended/interfaces/tts.py +1 -1
symai/extended/interfaces/whisper.py +1 -1
symai/extended/interfaces/wolframalpha.py +1 -1
symai/extended/metrics/__init__.py +11 -1
symai/extended/metrics/similarity.py +11 -13
symai/extended/os_command.py +17 -16
symai/extended/packages/__init__.py +29 -3
symai/extended/packages/symdev.py +19 -16
symai/extended/packages/sympkg.py +12 -9
symai/extended/packages/symrun.py +21 -19
symai/extended/repo_cloner.py +11 -10
symai/extended/seo_query_optimizer.py +1 -2
symai/extended/solver.py +20 -23
symai/extended/summarizer.py +4 -3
symai/extended/taypan_interpreter.py +10 -12
symai/extended/vectordb.py +99 -82
symai/formatter/__init__.py +9 -1
symai/formatter/formatter.py +12 -16
symai/formatter/regex.py +62 -63
symai/functional.py +176 -122
symai/imports.py +136 -127
symai/interfaces.py +56 -27
symai/memory.py +14 -13
symai/misc/console.py +49 -39
symai/misc/loader.py +5 -3
symai/models/__init__.py +17 -1
symai/models/base.py +269 -181
symai/models/errors.py +0 -1
symai/ops/__init__.py +32 -22
symai/ops/measures.py +11 -15
symai/ops/primitives.py +348 -228
symai/post_processors.py +32 -28
symai/pre_processors.py +39 -41
symai/processor.py +6 -4
symai/prompts.py +59 -45
symai/server/huggingface_server.py +23 -20
symai/server/llama_cpp_server.py +7 -5
symai/shell.py +3 -4
symai/shellsv.py +499 -375
symai/strategy.py +517 -287
symai/symbol.py +111 -116
symai/utils.py +42 -36
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
symbolicai-1.0.0.dist-info/RECORD +163 -0
symbolicai-0.20.2.dist-info/RECORD +0 -162
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0

symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py CHANGED Viewed

@@ -2,15 +2,14 @@ import json
 import logging
 import re
 from copy import deepcopy
-from typing import List, Optional
+from typing import ClassVar
 import openai
 import tiktoken
 from ....components import SelfPrompt
-from ....misc.console import ConsoleStyle
 from ....symbol import Symbol
-from ....utils import CustomUserWarning, encode_media_frames
+from ....utils import UserMessage, encode_media_frames
 from ...base import Engine
 from ...mixin.openai import OpenAIMixin
 from ...settings import SYMAI_CONFIG
@@ -23,7 +22,39 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
 class GPTXChatEngine(Engine, OpenAIMixin):
-    def __init__(self, api_key: Optional[str] = None, model: Optional[str] = None):
+    _THREE_TOKEN_MODELS: ClassVar[set[str]] = {
+        "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-16k-0613",
+        "gpt-4-1106-preview",
+        "gpt-4-0314",
+        "gpt-4-32k-0314",
+        "gpt-4-0613",
+        "gpt-4-32k-0613",
+        "gpt-4-turbo",
+        "gpt-4o",
+        "gpt-4o-2024-11-20",
+        "gpt-4o-mini",
+        "chatgpt-4o-latest",
+        "gpt-4.1",
+        "gpt-4.1-mini",
+        "gpt-4.1-nano",
+        "gpt-5-chat-latest",
+    }
+    _VISION_PREVIEW_MODEL = "gpt-4-vision-preview"
+    _VISION_IMAGE_URL_MODELS: ClassVar[set[str]] = {
+        "gpt-4-turbo-2024-04-09",
+        "gpt-4-turbo",
+        "gpt-4o",
+        "gpt-4o-mini",
+        "chatgpt-4o-latest",
+        "gpt-4.1",
+        "gpt-4.1-mini",
+        "gpt-4.1-nano",
+        "gpt-5-chat-latest",
+    }
+    _NON_VERBOSE_OUTPUT = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
+    def __init__(self, api_key: str | None = None, model: str | None = None):
         super().__init__()
         self.config = deepcopy(SYMAI_CONFIG)
         # In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
@@ -36,7 +67,7 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         self.model = self.config['NEUROSYMBOLIC_ENGINE_MODEL']
         try:
             self.tokenizer = tiktoken.encoding_for_model(self.model)
-        except Exception as e:
+        except Exception:
             self.tokenizer = tiktoken.get_encoding('o200k_base')
         self.max_context_tokens = self.api_max_context_tokens()
         self.max_response_tokens = self.api_max_response_tokens()
@@ -46,7 +77,7 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         try:
             self.client = openai.Client(api_key=openai.api_key)
         except Exception as e:
-            CustomUserWarning(f'Failed to initialize OpenAI client. Please check your OpenAI library version. Caused by: {e}', raise_with=ValueError)
+            UserMessage(f'Failed to initialize OpenAI client. Please check your OpenAI library version. Caused by: {e}', raise_with=ValueError)
     def id(self) -> str:
         if self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
@@ -67,58 +98,43 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         if 'seed' in kwargs:
             self.seed = kwargs['seed']
-    def compute_required_tokens(self, messages):
-        """Return the number of tokens used by a list of messages."""
-        if self.model in {
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-16k-0613",
-            "gpt-4-1106-preview",
-            "gpt-4-0314",
-            "gpt-4-32k-0314",
-            "gpt-4-0613",
-            "gpt-4-32k-0613",
-            "gpt-4-turbo",
-            "gpt-4o",
-            "gpt-4o-2024-11-20",
-            "gpt-4o-mini",
-            "chatgpt-4o-latest",
-            "gpt-4.1",
-            "gpt-4.1-mini",
-            "gpt-4.1-nano",
-            "gpt-5-chat-latest"
-            }:
-            tokens_per_message = 3
-            tokens_per_name = 1
-        elif self.model == "gpt-3.5-turbo-0301":
-            tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-            tokens_per_name = -1    # if there's a name, the role is omitted
-        elif self.model == "gpt-3.5-turbo":
-            CustomUserWarning("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
-            tokens_per_message = 3
-            tokens_per_name = 1
+    def _resolve_token_config(self) -> tuple[int, int]:
+        if self.model in self._THREE_TOKEN_MODELS:
+            return 3, 1
+        if self.model == "gpt-3.5-turbo-0301":
+            return 4, -1
+        if self.model == "gpt-3.5-turbo":
+            UserMessage("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
             self.tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo-0613")
-        elif self.model == "gpt-4":
-            tokens_per_message = 3
-            tokens_per_name = 1
-            CustomUserWarning("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+            return 3, 1
+        if self.model == "gpt-4":
+            UserMessage("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
             self.tokenizer = tiktoken.encoding_for_model("gpt-4-0613")
-        else:
-            CustomUserWarning(
-                f"""num_tokens_from_messages() is not implemented for model {self.model}. See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken for information on how messages are converted to tokens.""",
-                raise_with=NotImplementedError
-            )
+            return 3, 1
+        UserMessage(
+            f"""num_tokens_from_messages() is not implemented for model {self.model}. See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken for information on how messages are converted to tokens.""",
+            raise_with=NotImplementedError
+        )
+        raise NotImplementedError
+    def _count_tokens_in_value(self, value) -> int:
+        if isinstance(value, str):
+            return len(self.tokenizer.encode(value, disallowed_special=()))
+        tokens = 0
+        for item in value:
+            if item['type'] == 'text':
+                tokens += len(self.tokenizer.encode(item['text'], disallowed_special=()))
+        return tokens
+    def compute_required_tokens(self, messages):
+        """Return the number of tokens used by a list of messages."""
+        tokens_per_message, tokens_per_name = self._resolve_token_config()
         num_tokens = 0
         for message in messages:
             num_tokens += tokens_per_message
             for key, value in message.items():
-                if type(value) == str:
-                    num_tokens += len(self.tokenizer.encode(value, disallowed_special=()))
-                else:
-                    for v in value:
-                        if v['type'] == 'text':
-                            num_tokens += len(self.tokenizer.encode(v['text'], disallowed_special=()))
+                num_tokens += self._count_tokens_in_value(value)
                 if key == "name":
                     num_tokens += tokens_per_name
         num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
@@ -128,6 +144,58 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         val = self.compute_required_tokens(prompts)
         return min(self.max_context_tokens - val, self.max_response_tokens)
+    def _should_skip_truncation(self, prompts: list[dict]) -> bool:
+        if len(prompts) != 2 and all(prompt['role'] in ['system', 'user'] for prompt in prompts):
+            UserMessage(f"Token truncation currently supports only two messages, from 'user' and 'system' (got {len(prompts)}). Returning original prompts.")
+            return True
+        return False
+    def _resolve_truncation_percentage(self, truncation_percentage: float | None) -> float:
+        if truncation_percentage is not None:
+            return truncation_percentage
+        return (self.max_context_tokens - self.max_response_tokens) / self.max_context_tokens
+    def _collect_user_tokens(self, user_prompt: dict, prompts: list[dict]) -> tuple[list, object | None]:
+        user_tokens: list = []
+        content = user_prompt['content']
+        if isinstance(content, str):
+            user_tokens.extend(Symbol(content).tokens)
+            return user_tokens, None
+        if isinstance(content, list):
+            for content_item in content:
+                if isinstance(content_item, dict):
+                    if content_item.get('type') == 'text':
+                        user_tokens.extend(Symbol(content_item['text']).tokens)
+                    else:
+                        return [], prompts
+                else:
+                    return [], ValueError(f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python")
+            return user_tokens, None
+        UserMessage(f"Unknown content type: {type(content)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
+        return user_tokens, None
+    def _user_only_exceeds(self, user_token_count: int, system_token_count: int, max_prompt_tokens: int) -> bool:
+        return user_token_count > max_prompt_tokens/2 and system_token_count <= max_prompt_tokens/2
+    def _system_only_exceeds(self, system_token_count: int, user_token_count: int, max_prompt_tokens: int) -> bool:
+        return system_token_count > max_prompt_tokens/2 and user_token_count <= max_prompt_tokens/2
+    def _compute_proportional_lengths(self, system_token_count: int, user_token_count: int, total_tokens: int, max_prompt_tokens: int) -> tuple[int, int]:
+        system_ratio = system_token_count / total_tokens
+        user_ratio = user_token_count / total_tokens
+        new_system_len = int(max_prompt_tokens * system_ratio)
+        new_user_len = int(max_prompt_tokens * user_ratio)
+        distribute_tokens = max_prompt_tokens - new_system_len - new_user_len
+        new_system_len += distribute_tokens // 2
+        new_user_len += distribute_tokens // 2
+        return new_system_len, new_user_len
+    def _decode_prompt_pair(self, system_tokens, user_tokens) -> list[dict]:
+        return [
+            {'role': 'system', 'content': self.tokenizer.decode(system_tokens)},
+            {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]}
+        ]
     def _handle_image_content(self, content: str) -> list:
         """Handle image content by processing vision patterns and returning image file data."""
         def extract_pattern(text):
@@ -151,9 +219,7 @@ class GPTXChatEngine(Engine, OpenAIMixin):
             parts = extract_pattern(content)
             for p in parts:
                 img_ = p.strip()
-                if img_.startswith('http'):
-                    image_files.append(img_)
-                elif img_.startswith('data:image'):
+                if img_.startswith('http') or img_.startswith('data:image'):
                     image_files.append(img_)
                 else:
                     max_frames_spacing = 50
@@ -163,7 +229,7 @@ class GPTXChatEngine(Engine, OpenAIMixin):
                         max_used_frames, img_ = img_.split(':')
                         max_used_frames = int(max_used_frames)
                         if max_used_frames < 1 or max_used_frames > max_frames_spacing:
-                            CustomUserWarning(f"Invalid max_used_frames value: {max_used_frames}. Expected value between 1 and {max_frames_spacing}", raise_with=ValueError)
+                            UserMessage(f"Invalid max_used_frames value: {max_used_frames}. Expected value between 1 and {max_frames_spacing}", raise_with=ValueError)
                     buffer, ext = encode_media_frames(img_)
                     if len(buffer) > 1:
                         step = len(buffer) // max_frames_spacing # max frames spacing
@@ -175,7 +241,7 @@ class GPTXChatEngine(Engine, OpenAIMixin):
                     elif len(buffer) == 1:
                         image_files.append(f"data:image/{ext};base64,{buffer[0]}")
                     else:
-                        print('No frames found or error in encoding frames')
+                        UserMessage('No frames found or error in encoding frames')
         return image_files
     def _remove_vision_pattern(self, text: str) -> str:
@@ -190,41 +256,16 @@ class GPTXChatEngine(Engine, OpenAIMixin):
             new_len = max(100, new_len)  # Ensure minimum token length
             return tokens[-new_len:] if truncation_type == 'head' else tokens[:new_len] # else 'tail'
-        if len(prompts) != 2 and all(prompt['role'] in ['system', 'user'] for prompt in prompts):
-            # Only support system and user prompts
-            CustomUserWarning(f"Token truncation currently supports only two messages, from 'user' and 'system' (got {len(prompts)}). Returning original prompts.")
+        if self._should_skip_truncation(prompts):
             return prompts
-        if truncation_percentage is None:
-            # Calculate smart truncation percentage based on model's max messages and completion tokens
-            truncation_percentage = (self.max_context_tokens - self.max_response_tokens) / self.max_context_tokens
+        truncation_percentage = self._resolve_truncation_percentage(truncation_percentage)
         system_prompt = prompts[0]
         user_prompt = prompts[1]
-        # Get token counts
         system_tokens = Symbol(system_prompt['content']).tokens
-        user_tokens = []
-        if isinstance(user_prompt['content'], str):
-            # Default input format
-            user_tokens.extend(Symbol(user_prompt['content']).tokens)
-        elif isinstance(user_prompt['content'], list):
-            for content_item in user_prompt['content']:
-                # Image input format
-                if isinstance(content_item, dict):
-                    if content_item.get('type') == 'text':
-                        user_tokens.extend(Symbol(content_item['text']).tokens)
-                    else:
-                        # Image content; return original since not supported
-                        return prompts
-                else:
-                    # Unknown input format
-                    return ValueError(f"Invalid content type: {type(content_item)}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python")
-        else:
-            # Unknown input format
-            CustomUserWarning(f"Unknown content type: {type(user_prompt['content'])}. Format input according to the documentation. See https://platform.openai.com/docs/api-reference/chat/create?lang=python", raise_with=ValueError)
+        user_tokens, fallback = self._collect_user_tokens(user_prompt, prompts)
+        if fallback is not None:
+            return fallback
         system_token_count = len(system_tokens)
         user_token_count = len(user_tokens)
         artifacts = self.compute_required_tokens(prompts) - (system_token_count + user_token_count)
@@ -238,7 +279,7 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         if total_tokens <= max_prompt_tokens:
             return prompts
-        CustomUserWarning(
+        UserMessage(
             f"Executing {truncation_type} truncation to fit within {max_prompt_tokens} tokens. "
             f"Combined prompts ({total_tokens} tokens) exceed maximum allowed tokens "
             f"of {max_prompt_tokens} ({truncation_percentage*100:.1f}% of context). "
@@ -248,40 +289,23 @@ class GPTXChatEngine(Engine, OpenAIMixin):
             f"Choose 'truncation_type' as 'head' to keep the end of prompts or 'tail' to keep the beginning."
         )
         # Case 1: Only user prompt exceeds
-        if user_token_count > max_prompt_tokens/2 and system_token_count <= max_prompt_tokens/2:
+        if self._user_only_exceeds(user_token_count, system_token_count, max_prompt_tokens):
             new_user_len = max_prompt_tokens - system_token_count
             new_user_tokens = _slice_tokens(user_tokens, new_user_len, truncation_type)
-            return [
-                {'role': 'system', 'content': self.tokenizer.decode(system_tokens)},
-                {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(new_user_tokens)}]}
-            ]
+            return self._decode_prompt_pair(system_tokens, new_user_tokens)
         # Case 2: Only system prompt exceeds
-        if system_token_count > max_prompt_tokens/2 and user_token_count <= max_prompt_tokens/2:
+        if self._system_only_exceeds(system_token_count, user_token_count, max_prompt_tokens):
             new_system_len = max_prompt_tokens - user_token_count
             new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
-            return [
-                {'role': 'system', 'content': self.tokenizer.decode(new_system_tokens)},
-                {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(user_tokens)}]}
-            ]
+            return self._decode_prompt_pair(new_system_tokens, user_tokens)
         # Case 3: Both exceed - reduce proportionally
-        system_ratio = system_token_count / total_tokens
-        user_ratio = user_token_count / total_tokens
-        new_system_len = int(max_prompt_tokens * system_ratio)
-        new_user_len = int(max_prompt_tokens * user_ratio)
-        distribute_tokens = max_prompt_tokens - new_system_len - new_user_len
-        new_system_len += distribute_tokens // 2
-        new_user_len += distribute_tokens // 2
+        new_system_len, new_user_len = self._compute_proportional_lengths(system_token_count, user_token_count, total_tokens, max_prompt_tokens)
         new_system_tokens = _slice_tokens(system_tokens, new_system_len, truncation_type)
         new_user_tokens = _slice_tokens(user_tokens, new_user_len, truncation_type)
-        return [
-            {'role': 'system', 'content': self.tokenizer.decode(new_system_tokens)},
-            {'role': 'user', 'content': [{'type': 'text', 'text': self.tokenizer.decode(new_user_tokens)}]}
-        ]
+        return self._decode_prompt_pair(new_system_tokens, new_user_tokens)
     def forward(self, argument):
         kwargs = argument.kwargs
@@ -297,18 +321,18 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         except Exception as e:
             if openai.api_key is None or openai.api_key == '':
                 msg = 'OpenAI API key is not set. Please set it in the config file or pass it as an argument to the command method.'
-                logging.error(msg)
+                UserMessage(msg)
                 if self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] is None or self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] == '':
-                    CustomUserWarning(msg, raise_with=ValueError)
+                    UserMessage(msg, raise_with=ValueError)
                 openai.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
             callback = self.client.chat.completions.create
-            kwargs['model'] = kwargs['model'] if 'model' in kwargs else self.model
+            kwargs['model'] = kwargs.get('model', self.model)
             if except_remedy is not None:
                 res = except_remedy(self, e, callback, argument)
             else:
-                CustomUserWarning(f'Error during generation. Caused by: {e}', raise_with=ValueError)
+                UserMessage(f'Error during generation. Caused by: {e}', raise_with=ValueError)
         metadata = {'raw_output': res}
         if payload.get('tools'):
@@ -322,111 +346,111 @@ class GPTXChatEngine(Engine, OpenAIMixin):
     def _prepare_raw_input(self, argument):
         if not argument.prop.processed_input:
-            CustomUserWarning('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
+            UserMessage('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
         value = argument.prop.processed_input
         # convert to dict if not already
-        if type(value) != list:
-            if type(value) != dict:
+        if not isinstance(value, list):
+            if not isinstance(value, dict):
                 value = {'role': 'user', 'content': str(value)}
             value = [value]
         return value
-    def prepare(self, argument):
-        if argument.prop.raw_input:
-            argument.prop.prepared_input = self._prepare_raw_input(argument)
-            return
-        _non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
-        user:   str = ""
-        system: str = ""
-        if argument.prop.suppress_verbose_output:
-            system += _non_verbose_output
-        system = f'{system}\n' if system and len(system) > 0 else ''
-        if argument.prop.response_format:
-            _rsp_fmt = argument.prop.response_format
-            assert _rsp_fmt.get('type') is not None, 'Expected format `{ "type": "json_object" }`! See https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format'
-            if _rsp_fmt["type"] == "json_object":
-                # OpenAI docs:
-                    # "Important: when using JSON mode, you must also instruct the model
-                    #  to produce JSON yourself via a system or user message"
-                system += f'<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n'
+    def _build_non_verbose_prefix(self, argument) -> list[str]:
+        if not argument.prop.suppress_verbose_output:
+            return []
+        prefix = f'{self._NON_VERBOSE_OUTPUT}\n'
+        return [prefix]
+    def _response_format_section(self, argument) -> list[str]:
+        if not argument.prop.response_format:
+            return []
+        _rsp_fmt = argument.prop.response_format
+        assert _rsp_fmt.get('type') is not None, 'Expected format `{ "type": "json_object" }`! See https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format'
+        if _rsp_fmt["type"] != "json_object":
+            return []
+        return ['<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n']
+    def _context_sections(self, argument) -> list[str]:
+        sections: list[str] = []
         ref = argument.prop.instance
         static_ctxt, dyn_ctxt = ref.global_context
         if len(static_ctxt) > 0:
-            system += f"<STATIC CONTEXT/>\n{static_ctxt}\n\n"
+            sections.append(f"<STATIC CONTEXT/>\n{static_ctxt}\n\n")
         if len(dyn_ctxt) > 0:
-            system += f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n"
+            sections.append(f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n")
+        return sections
+    def _payload_section(self, argument) -> list[str]:
+        if not argument.prop.payload:
+            return []
         payload = argument.prop.payload
-        if argument.prop.payload:
-            system += f"<ADDITIONAL CONTEXT/>\n{str(payload)}\n\n"
-        examples: List[str] = argument.prop.examples
-        if examples and len(examples) > 0:
-            system += f"<EXAMPLES/>\n{str(examples)}\n\n"
-        image_files = self._handle_image_content(str(argument.prop.processed_input))
-        if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
-            val = str(argument.prop.prompt)
-            if len(image_files) > 0:
-                val = self._remove_vision_pattern(val)
-            system += f"<INSTRUCTION/>\n{val}\n\n"
+        return [f"<ADDITIONAL CONTEXT/>\n{payload!s}\n\n"]
+    def _examples_section(self, argument) -> list[str]:
+        examples: list[str] = argument.prop.examples
+        if not (examples and len(examples) > 0):
+            return []
+        return [f"<EXAMPLES/>\n{examples!s}\n\n"]
+    def _instruction_section(self, argument, image_files: list[str]) -> list[str]:
+        if argument.prop.prompt is None or len(argument.prop.prompt) == 0:
+            return []
+        val = str(argument.prop.prompt)
+        if len(image_files) > 0:
+            val = self._remove_vision_pattern(val)
+        return [f"<INSTRUCTION/>\n{val}\n\n"]
+    def _template_suffix_section(self, argument) -> list[str]:
+        if not argument.prop.template_suffix:
+            return []
+        return [f' You will only generate content for the placeholder `{argument.prop.template_suffix!s}` following the instructions and the provided context information.\n\n']
+    def _build_system_message(self, argument, image_files: list[str]) -> str:
+        sections: list[str] = []
+        sections.extend(self._build_non_verbose_prefix(argument))
+        sections.extend(self._response_format_section(argument))
+        sections.extend(self._context_sections(argument))
+        sections.extend(self._payload_section(argument))
+        sections.extend(self._examples_section(argument))
+        sections.extend(self._instruction_section(argument, image_files))
+        sections.extend(self._template_suffix_section(argument))
+        return "".join(sections)
+    def _build_user_text(self, argument, image_files: list[str]) -> str:
         suffix: str = str(argument.prop.processed_input)
         if len(image_files) > 0:
             suffix = self._remove_vision_pattern(suffix)
+        return f"{suffix}"
+    def _create_user_prompt(self, user_text: str, image_files: list[str]) -> dict:
+        if self.model == self._VISION_PREVIEW_MODEL:
+            images = [{'type': 'image', "image_url": {"url": file}} for file in image_files]
+            return {"role": "user", "content": [*images, {'type': 'text', 'text': user_text}]}
+        if self.model in self._VISION_IMAGE_URL_MODELS:
+            images = [{'type': 'image_url', "image_url": {"url": file}} for file in image_files]
+            return {"role": "user", "content": [*images, {'type': 'text', 'text': user_text}]}
+        return {"role": "user", "content": user_text}
+    def _apply_self_prompt_if_needed(self, argument, system: str, user_prompt: dict, user_text: str, image_files: list[str]) -> tuple[str, dict]:
+        if not (argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt):
+            return system, user_prompt
+        self_prompter = SelfPrompt()
+        res = self_prompter({'user': user_text, 'system': system})
+        if res is None:
+            UserMessage("Self-prompting failed!", raise_with=ValueError)
+        new_user_prompt = self._create_user_prompt(res['user'], image_files)
+        return res['system'], new_user_prompt
-        user += f"{suffix}"
-        if argument.prop.template_suffix:
-            system += f' You will only generate content for the placeholder `{str(argument.prop.template_suffix)}` following the instructions and the provided context information.\n\n'
-        if self.model == 'gpt-4-vision-preview':
-           images = [{ 'type': 'image', "image_url": { "url": file }} for file in image_files]
-           user_prompt = { "role": "user", "content": [
-                *images,
-                { 'type': 'text', 'text': user }
-            ]}
-        elif self.model == 'gpt-4-turbo-2024-04-09' or \
-             self.model == 'gpt-4-turbo' or \
-             self.model == 'gpt-4o' or \
-             self.model == 'gpt-4o-mini' or \
-             self.model == 'chatgpt-4o-latest' or \
-             self.model == 'gpt-4.1' or \
-             self.model == 'gpt-4.1-mini' or \
-             self.model == 'gpt-4.1-nano' or \
-             self.model == 'gpt-5-chat-latest':
-            images = [{ 'type': 'image_url', "image_url": { "url": file }} for file in image_files]
-            user_prompt = { "role": "user", "content": [
-                *images,
-                { 'type': 'text', 'text': user }
-            ]}
-        else:
-            user_prompt = { "role": "user", "content": user }
-        # First check if the `Symbol` instance has the flag set, otherwise check if it was passed as an argument to a method
-        if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
-            self_prompter = SelfPrompt()
-            res = self_prompter({'user': user, 'system': system})
-            if res is None:
-                CustomUserWarning("Self-prompting failed!", raise_with=ValueError)
-            if len(image_files) > 0:
-                user_prompt = { "role": "user", "content": [
-                    *images,
-                    { 'type': 'text', 'text': res['user'] }
-                ]}
-            else:
-                user_prompt = { "role": "user", "content": res['user'] }
+    def prepare(self, argument):
+        if argument.prop.raw_input:
+            argument.prop.prepared_input = self._prepare_raw_input(argument)
+            return
-            system = res['system']
+        image_files = self._handle_image_content(str(argument.prop.processed_input))
+        system = self._build_system_message(argument, image_files)
+        user_text = self._build_user_text(argument, image_files)
+        user_prompt = self._create_user_prompt(user_text, image_files)
+        system, user_prompt = self._apply_self_prompt_if_needed(argument, system, user_prompt, user_text, image_files)
         argument.prop.prepared_input = [
             { "role": "system", "content": system },
@@ -435,24 +459,28 @@ class GPTXChatEngine(Engine, OpenAIMixin):
     def _process_function_calls(self, res, metadata):
         hit = False
-        if hasattr(res, 'choices') and res.choices:
-            choice = res.choices[0]
-            if hasattr(choice, 'message') and choice.message:
-                if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
-                    for tool_call in choice.message.tool_calls:
-                        if hasattr(tool_call, 'function') and tool_call.function:
-                            if hit:
-                                CustomUserWarning("Multiple function calls detected in the response but only the first one will be processed.")
-                                break
-                            try:
-                                args_dict = json.loads(tool_call.function.arguments)
-                            except json.JSONDecodeError:
-                                args_dict = {}
-                            metadata['function_call'] = {
-                                'name': tool_call.function.name,
-                                'arguments': args_dict
-                            }
-                            hit = True
+        if (
+            hasattr(res, 'choices')
+            and res.choices
+            and hasattr(res.choices[0], 'message')
+            and res.choices[0].message
+            and hasattr(res.choices[0].message, 'tool_calls')
+            and res.choices[0].message.tool_calls
+        ):
+            for tool_call in res.choices[0].message.tool_calls:
+                if hasattr(tool_call, 'function') and tool_call.function:
+                    if hit:
+                        UserMessage("Multiple function calls detected in the response but only the first one will be processed.")
+                        break
+                    try:
+                        args_dict = json.loads(tool_call.function.arguments)
+                    except json.JSONDecodeError:
+                        args_dict = {}
+                    metadata['function_call'] = {
+                        'name': tool_call.function.name,
+                        'arguments': args_dict
+                    }
+                    hit = True
         return metadata
     def _prepare_request_payload(self, messages, argument):
@@ -464,13 +492,13 @@ class GPTXChatEngine(Engine, OpenAIMixin):
         remaining_tokens = self.compute_remaining_tokens(messages)
         if max_tokens is not None:
-            CustomUserWarning(
+            UserMessage(
                 "'max_tokens' is now deprecated in favor of 'max_completion_tokens', and is not compatible with o1 series models. "
                 "We handle this conversion by default for you for now but we won't in the future. "
                 "See: https://platform.openai.com/docs/api-reference/chat/create"
             )
             if max_tokens > self.max_response_tokens:
-                CustomUserWarning(
+                UserMessage(
                     f"Provided 'max_tokens' ({max_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
                     f"Truncating to {remaining_tokens} to avoid API failure."
                 )
@@ -479,13 +507,12 @@ class GPTXChatEngine(Engine, OpenAIMixin):
                 kwargs['max_completion_tokens'] = max_tokens
             del kwargs['max_tokens']
-        if max_completion_tokens is not None:
-            if max_completion_tokens > self.max_response_tokens:
-                CustomUserWarning(
-                    f"Provided 'max_completion_tokens' ({max_completion_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
-                    f"Truncating to {remaining_tokens} to avoid API failure."
-                )
-                kwargs['max_completion_tokens'] = remaining_tokens
+        if max_completion_tokens is not None and max_completion_tokens > self.max_response_tokens:
+            UserMessage(
+                f"Provided 'max_completion_tokens' ({max_completion_tokens}) exceeds max response tokens ({self.max_response_tokens}). "
+                f"Truncating to {remaining_tokens} to avoid API failure."
+            )
+            kwargs['max_completion_tokens'] = remaining_tokens
         payload = {
             "messages": messages,

symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

symbolicai 0.20.2py3-none-any.whl → 1.0.0py3-none-any.whl