PyPI - symbolicai - Versions diffs - 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

symbolicai 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

symai/__init__.py +21 -71
symai/backend/base.py +0 -26
symai/backend/engines/drawing/engine_gemini_image.py +101 -0
symai/backend/engines/embedding/engine_openai.py +11 -8
symai/backend/engines/neurosymbolic/__init__.py +8 -0
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +14 -1
symai/backend/engines/neurosymbolic/engine_openrouter.py +294 -0
symai/backend/engines/scrape/engine_requests.py +39 -10
symai/backend/engines/search/__init__.py +13 -0
symai/backend/engines/search/engine_firecrawl.py +333 -0
symai/backend/engines/search/engine_parallel.py +5 -5
symai/backend/mixin/__init__.py +4 -0
symai/backend/mixin/openrouter.py +2 -0
symai/components.py +212 -16
symai/extended/interfaces/firecrawl.py +30 -0
symai/extended/interfaces/nanobanana.py +23 -0
symai/extended/interfaces/parallel.py +5 -5
symai/functional.py +3 -4
symai/interfaces.py +2 -0
symai/ops/primitives.py +0 -18
symai/shellsv.py +2 -7
{symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/METADATA +3 -9
{symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/RECORD +27 -47
{symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/WHEEL +1 -1
symai/backend/driver/webclient.py +0 -217
symai/backend/engines/crawler/engine_selenium.py +0 -94
symai/backend/engines/drawing/engine_dall_e.py +0 -131
symai/backend/engines/embedding/engine_plugin_embeddings.py +0 -12
symai/backend/engines/experiments/engine_bard_wrapper.py +0 -131
symai/backend/engines/experiments/engine_gptfinetuner.py +0 -32
symai/backend/engines/experiments/engine_llamacpp_completion.py +0 -142
symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py +0 -277
symai/collect/__init__.py +0 -8
symai/collect/dynamic.py +0 -117
symai/collect/pipeline.py +0 -156
symai/collect/stats.py +0 -434
symai/extended/crawler.py +0 -21
symai/extended/interfaces/selenium.py +0 -18
symai/extended/interfaces/vectordb.py +0 -21
symai/extended/personas/__init__.py +0 -3
symai/extended/personas/builder.py +0 -105
symai/extended/personas/dialogue.py +0 -126
symai/extended/personas/persona.py +0 -154
symai/extended/personas/research/__init__.py +0 -1
symai/extended/personas/research/yann_lecun.py +0 -62
symai/extended/personas/sales/__init__.py +0 -1
symai/extended/personas/sales/erik_james.py +0 -62
symai/extended/personas/student/__init__.py +0 -1
symai/extended/personas/student/max_tenner.py +0 -51
symai/extended/strategies/__init__.py +0 -1
symai/extended/strategies/cot.py +0 -40
{symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/entry_points.txt +0 -0
{symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/licenses/LICENSE +0 -0
{symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/top_level.txt +0 -0

symai/backend/engines/neurosymbolic/engine_openrouter.py ADDED Viewed

@@ -0,0 +1,294 @@
+import json
+import logging
+from copy import deepcopy
+import openai
+from ....components import SelfPrompt
+from ....core_ext import retry
+from ....utils import UserMessage
+from ...base import Engine
+from ...settings import SYMAI_CONFIG
+logging.getLogger("openai").setLevel(logging.ERROR)
+logging.getLogger("requests").setLevel(logging.ERROR)
+logging.getLogger("urllib").setLevel(logging.ERROR)
+logging.getLogger("httpx").setLevel(logging.ERROR)
+logging.getLogger("httpcore").setLevel(logging.ERROR)
+_NON_VERBOSE_OUTPUT = (
+    "<META_INSTRUCTION/>\n"
+    "You do not output anything else, like verbose preambles or post explanation, such as "
+    '"Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. '
+    "Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use "
+    "indentation, etc. Never add meta instructions information to your output!\n\n"
+)
+class OpenRouterEngine(Engine):
+    def __init__(self, api_key: str | None = None, model: str | None = None):
+        super().__init__()
+        self.config = deepcopy(SYMAI_CONFIG)
+        # In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
+        if api_key is not None and model is not None:
+            self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] = api_key
+            self.config["NEUROSYMBOLIC_ENGINE_MODEL"] = model
+        if self.id() != "neurosymbolic":
+            return  # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
+        openai.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
+        self.model = self.config["NEUROSYMBOLIC_ENGINE_MODEL"]
+        self.seed = None
+        self.name = self.__class__.__name__
+        self._last_prompt_tokens = None
+        self._last_messages = None
+        try:
+            self.client = openai.OpenAI(
+                api_key=openai.api_key, base_url="https://openrouter.ai/api/v1"
+            )
+        except Exception as exc:
+            UserMessage(
+                f"Failed to initialize OpenRouter client. Please check your OpenAI library version. Caused by: {exc}",
+                raise_with=ValueError,
+            )
+    def id(self) -> str:
+        model_name = self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
+        if model_name and model_name.startswith("openrouter"):
+            return "neurosymbolic"
+        return super().id()
+    def command(self, *args, **kwargs):
+        super().command(*args, **kwargs)
+        if "NEUROSYMBOLIC_ENGINE_API_KEY" in kwargs:
+            openai.api_key = kwargs["NEUROSYMBOLIC_ENGINE_API_KEY"]
+        if "NEUROSYMBOLIC_ENGINE_MODEL" in kwargs:
+            self.model = kwargs["NEUROSYMBOLIC_ENGINE_MODEL"]
+        if "seed" in kwargs:
+            self.seed = kwargs["seed"]
+    def compute_required_tokens(self, messages):
+        if self._last_prompt_tokens is not None and self._last_messages == messages:
+            return self._last_prompt_tokens
+        UserMessage(
+            "Token counting not implemented for this engine.", raise_with=NotImplementedError
+        )
+        return 0
+    def compute_remaining_tokens(self, _prompts: list) -> int:
+        UserMessage(
+            "Token counting not implemented for this engine.", raise_with=NotImplementedError
+        )
+    def _handle_prefix(self, model_name: str) -> str:
+        """Handle prefix for model name."""
+        return model_name.replace("openrouter:", "")
+    def _extract_thinking_content(self, output: list[str]) -> tuple[str | None, list[str]]:
+        """Extract thinking content from textual output using <think>...</think> tags if present."""
+        if not output or not output[0]:
+            return None, output
+        content = output[0]
+        start = content.find("<think>")
+        if start == -1:
+            return None, output
+        end = content.find("</think>", start + 7)
+        if end == -1:
+            return None, output
+        thinking_content = content[start + 7 : end].strip() or None
+        cleaned_content = (content[:start] + content[end + 8 :]).strip()
+        cleaned_output = [cleaned_content, *output[1:]]
+        return thinking_content, cleaned_output
+    # cumulative wait time is < 30s
+    @retry(tries=8, delay=0.5, backoff=1.5, max_delay=5, jitter=(0, 0.5))
+    def forward(self, argument):
+        kwargs = argument.kwargs
+        messages = argument.prop.prepared_input
+        payload = self._prepare_request_payload(messages, argument)
+        except_remedy = kwargs.get("except_remedy")
+        try:
+            res = self.client.chat.completions.create(**payload)
+        except Exception as exc:
+            if openai.api_key is None or openai.api_key == "":
+                msg = (
+                    "OpenRouter API key is not set. Please set it in the config file or "
+                    "pass it as an argument to the command method."
+                )
+                UserMessage(msg)
+                if (
+                    self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] is None
+                    or self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] == ""
+                ):
+                    UserMessage(msg, raise_with=ValueError)
+                openai.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
+            callback = self.client.chat.completions.create
+            kwargs["model"] = (
+                self._handle_prefix(kwargs["model"])
+                if "model" in kwargs
+                else self._handle_prefix(self.model)
+            )
+            if except_remedy is not None:
+                res = except_remedy(self, exc, callback, argument)
+            else:
+                UserMessage(f"Error during generation. Caused by: {exc}", raise_with=ValueError)
+        prompt_tokens = getattr(res.usage, "prompt_tokens", None)
+        if prompt_tokens is None:
+            prompt_tokens = getattr(res.usage, "input_tokens", None)
+        self._last_prompt_tokens = prompt_tokens
+        self._last_messages = messages
+        metadata = {"raw_output": res}
+        if payload.get("tools"):
+            metadata = self._process_function_calls(res, metadata)
+        output = [r.message.content for r in res.choices]
+        thinking, output = self._extract_thinking_content(output)
+        if thinking:
+            metadata["thinking"] = thinking
+        return output, metadata
+    def _prepare_raw_input(self, argument):
+        if not argument.prop.processed_input:
+            UserMessage(
+                "Need to provide a prompt instruction to the engine if raw_input is enabled.",
+                raise_with=ValueError,
+            )
+        value = argument.prop.processed_input
+        if not isinstance(value, list):
+            if not isinstance(value, dict):
+                value = {"role": "user", "content": str(value)}
+            value = [value]
+        return value
+    def prepare(self, argument):
+        if argument.prop.raw_input:
+            argument.prop.prepared_input = self._prepare_raw_input(argument)
+            return
+        self._validate_response_format(argument)
+        system = self._build_system_message(argument)
+        user_content = self._build_user_content(argument)
+        user_prompt = {"role": "user", "content": user_content}
+        system, user_prompt = self._apply_self_prompt_if_needed(argument, system, user_prompt)
+        argument.prop.prepared_input = [
+            {"role": "system", "content": system},
+            user_prompt,
+        ]
+    def _validate_response_format(self, argument) -> None:
+        if argument.prop.response_format:
+            response_format = argument.prop.response_format
+            assert response_format.get("type") is not None, (
+                'Expected format `{ "type": "json_object" }`! We are using the OpenAI compatible '
+                "API for OpenRouter."
+            )
+    def _build_system_message(self, argument) -> str:
+        system: str = ""
+        if argument.prop.suppress_verbose_output:
+            system += _NON_VERBOSE_OUTPUT
+        if system:
+            system = f"{system}\n"
+        ref = argument.prop.instance
+        static_ctxt, dyn_ctxt = ref.global_context
+        if len(static_ctxt) > 0:
+            system += f"<STATIC CONTEXT/>\n{static_ctxt}\n\n"
+        if len(dyn_ctxt) > 0:
+            system += f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n"
+        if argument.prop.payload:
+            system += f"<ADDITIONAL CONTEXT/>\n{argument.prop.payload!s}\n\n"
+        examples = argument.prop.examples
+        if examples and len(examples) > 0:
+            system += f"<EXAMPLES/>\n{examples!s}\n\n"
+        if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
+            val = str(argument.prop.prompt)
+            system += f"<INSTRUCTION/>\n{val}\n\n"
+        if argument.prop.template_suffix:
+            system += (
+                " You will only generate content for the placeholder "
+                f"`{argument.prop.template_suffix!s}` following the instructions and the provided context "
+                "information.\n\n"
+            )
+        return system
+    def _build_user_content(self, argument) -> str:
+        return str(argument.prop.processed_input)
+    def _apply_self_prompt_if_needed(self, argument, system, user_prompt):
+        if argument.prop.instance._kwargs.get("self_prompt", False) or argument.prop.self_prompt:
+            self_prompter = SelfPrompt()
+            res = self_prompter({"user": user_prompt["content"], "system": system})
+            if res is None:
+                UserMessage("Self-prompting failed!", raise_with=ValueError)
+            return res["system"], {"role": "user", "content": res["user"]}
+        return system, user_prompt
+    def _process_function_calls(self, res, metadata):
+        hit = False
+        if (
+            hasattr(res, "choices")
+            and res.choices
+            and hasattr(res.choices[0], "message")
+            and res.choices[0].message
+            and hasattr(res.choices[0].message, "tool_calls")
+            and res.choices[0].message.tool_calls
+        ):
+            for tool_call in res.choices[0].message.tool_calls:
+                if hasattr(tool_call, "function") and tool_call.function:
+                    if hit:
+                        UserMessage(
+                            "Multiple function calls detected in the response but only the first one will be processed."
+                        )
+                        break
+                    try:
+                        args_dict = json.loads(tool_call.function.arguments)
+                    except json.JSONDecodeError:
+                        args_dict = {}
+                    metadata["function_call"] = {
+                        "name": tool_call.function.name,
+                        "arguments": args_dict,
+                    }
+                    hit = True
+        return metadata
+    # TODO: requires updates for reasoning
+    def _prepare_request_payload(self, messages, argument):
+        kwargs = argument.kwargs
+        max_tokens = kwargs.get("max_tokens")
+        if max_tokens is None:
+            max_tokens = kwargs.get("max_completion_tokens")
+        return {
+            "messages": messages,
+            "model": self._handle_prefix(kwargs.get("model", self.model)),
+            "seed": kwargs.get("seed", self.seed),
+            "max_tokens": max_tokens,
+            "stop": kwargs.get("stop"),
+            "temperature": kwargs.get("temperature", 1),
+            "frequency_penalty": kwargs.get("frequency_penalty", 0),
+            "presence_penalty": kwargs.get("presence_penalty", 0),
+            "top_p": kwargs.get("top_p", 1),
+            "n": kwargs.get("n", 1),
+            "tools": kwargs.get("tools"),
+            "tool_choice": kwargs.get("tool_choice"),
+            "response_format": kwargs.get("response_format"),
+            "stream": kwargs.get("stream", False),
+        }

symai/backend/engines/scrape/engine_requests.py CHANGED Viewed

@@ -9,6 +9,7 @@ service disruption.
 import io
 import logging
+import random
 import re
 from typing import Any, ClassVar
 from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
@@ -17,7 +18,9 @@ import requests
 import trafilatura
 from bs4 import BeautifulSoup
 from pdfminer.high_level import extract_text
+from requests.adapters import HTTPAdapter
 from requests.structures import CaseInsensitiveDict
+from urllib3.util.retry import Retry
 from ....symbol import Result
 from ....utils import UserMessage
@@ -80,24 +83,49 @@ class RequestsEngine(Engine):
         "none": "None",
     }
-    def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
+    USER_AGENT_POOL: ClassVar[list[str]] = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0",
+        "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
+    ]
+    def __init__(self, timeout=15, verify_ssl=True, user_agent=None, retries=3, backoff_factor=0.5, retry_status_codes=(500, 502, 503, 504)):
         """
         Args:
             timeout: Seconds to wait for network operations before aborting.
             verify_ssl: Toggle for TLS certificate verification.
-            user_agent: Optional override for the default desktop Chrome UA.
+            user_agent: Optional override for user agent rotation.
+            retries: Number of retries for failed requests (default: 3).
+            backoff_factor: Multiplier for exponential backoff (default: 0.5).
+            retry_status_codes: HTTP status codes to retry on (default: 500, 502, 503, 504).
         """
         super().__init__()
         self.timeout = timeout
         self.verify_ssl = verify_ssl
         self.name = self.__class__.__name__
-        headers = dict(self.DEFAULT_HEADERS)
-        if user_agent:
-            headers["User-Agent"] = user_agent
+        self._user_agent_override = user_agent
         self.session = requests.Session()
-        self.session.headers.update(headers)
+        self.session.headers.update({k: v for k, v in self.DEFAULT_HEADERS.items() if k != "User-Agent"})
+        retry_strategy = Retry(
+            total=retries,
+            backoff_factor=backoff_factor,
+            status_forcelist=retry_status_codes,
+            allowed_methods=["GET", "HEAD"],
+        )
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        self.session.mount("http://", adapter)
+        self.session.mount("https://", adapter)
+    def _get_user_agent(self) -> str:
+        """Return user agent: override if set, otherwise random from pool."""
+        return self._user_agent_override or random.choice(self.USER_AGENT_POOL)
     def _maybe_set_bypass_cookies(self, url: str):
         netloc = urlparse(url).hostname
@@ -232,7 +260,7 @@ class RequestsEngine(Engine):
         # Avoid loops
         if target == resp.url:
             return resp
-        return self.session.get(target, timeout=timeout, allow_redirects=True)
+        return self.session.get(target, timeout=timeout, allow_redirects=True, headers={"User-Agent": self._get_user_agent()})
     def _fetch_with_playwright(
         self,
@@ -259,7 +287,7 @@ class RequestsEngine(Engine):
         timeout_seconds = timeout if timeout is not None else self.timeout
         timeout_ms = max(int(timeout_seconds * 1000), 0)
-        user_agent = self.session.headers.get("User-Agent")
+        user_agent = self._get_user_agent()
         parsed = urlparse(url)
         hostname = parsed.hostname or ""
@@ -348,7 +376,8 @@ class RequestsEngine(Engine):
             )
         else:
             resp = self.session.get(
-                clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
+                clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl,
+                headers={"User-Agent": self._get_user_agent()}
             )
         resp.raise_for_status()

symai/backend/engines/search/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .engine_firecrawl import FirecrawlEngine
+from .engine_parallel import ParallelEngine
+SEARCH_ENGINE_MAPPING = {
+    "firecrawl": FirecrawlEngine,
+    "parallel": ParallelEngine,
+}
+__all__ = [
+    "SEARCH_ENGINE_MAPPING",
+    "FirecrawlEngine",
+    "ParallelEngine",
+]

symbolicai 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

symbolicai 1.4.0py3-none-any.whl → 1.6.0py3-none-any.whl