symbolicai 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. symai/__init__.py +21 -71
  2. symai/backend/base.py +0 -26
  3. symai/backend/engines/drawing/engine_gemini_image.py +101 -0
  4. symai/backend/engines/embedding/engine_openai.py +11 -8
  5. symai/backend/engines/neurosymbolic/__init__.py +8 -0
  6. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +14 -1
  7. symai/backend/engines/neurosymbolic/engine_openrouter.py +294 -0
  8. symai/backend/engines/scrape/engine_requests.py +39 -10
  9. symai/backend/engines/search/__init__.py +13 -0
  10. symai/backend/engines/search/engine_firecrawl.py +333 -0
  11. symai/backend/engines/search/engine_parallel.py +5 -5
  12. symai/backend/mixin/__init__.py +4 -0
  13. symai/backend/mixin/openrouter.py +2 -0
  14. symai/components.py +212 -16
  15. symai/extended/interfaces/firecrawl.py +30 -0
  16. symai/extended/interfaces/nanobanana.py +23 -0
  17. symai/extended/interfaces/parallel.py +5 -5
  18. symai/functional.py +3 -4
  19. symai/interfaces.py +2 -0
  20. symai/ops/primitives.py +0 -18
  21. symai/shellsv.py +2 -7
  22. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/METADATA +3 -9
  23. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/RECORD +27 -47
  24. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/WHEEL +1 -1
  25. symai/backend/driver/webclient.py +0 -217
  26. symai/backend/engines/crawler/engine_selenium.py +0 -94
  27. symai/backend/engines/drawing/engine_dall_e.py +0 -131
  28. symai/backend/engines/embedding/engine_plugin_embeddings.py +0 -12
  29. symai/backend/engines/experiments/engine_bard_wrapper.py +0 -131
  30. symai/backend/engines/experiments/engine_gptfinetuner.py +0 -32
  31. symai/backend/engines/experiments/engine_llamacpp_completion.py +0 -142
  32. symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py +0 -277
  33. symai/collect/__init__.py +0 -8
  34. symai/collect/dynamic.py +0 -117
  35. symai/collect/pipeline.py +0 -156
  36. symai/collect/stats.py +0 -434
  37. symai/extended/crawler.py +0 -21
  38. symai/extended/interfaces/selenium.py +0 -18
  39. symai/extended/interfaces/vectordb.py +0 -21
  40. symai/extended/personas/__init__.py +0 -3
  41. symai/extended/personas/builder.py +0 -105
  42. symai/extended/personas/dialogue.py +0 -126
  43. symai/extended/personas/persona.py +0 -154
  44. symai/extended/personas/research/__init__.py +0 -1
  45. symai/extended/personas/research/yann_lecun.py +0 -62
  46. symai/extended/personas/sales/__init__.py +0 -1
  47. symai/extended/personas/sales/erik_james.py +0 -62
  48. symai/extended/personas/student/__init__.py +0 -1
  49. symai/extended/personas/student/max_tenner.py +0 -51
  50. symai/extended/strategies/__init__.py +0 -1
  51. symai/extended/strategies/cot.py +0 -40
  52. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/entry_points.txt +0 -0
  53. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/licenses/LICENSE +0 -0
  54. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,294 @@
1
+ import json
2
+ import logging
3
+ from copy import deepcopy
4
+
5
+ import openai
6
+
7
+ from ....components import SelfPrompt
8
+ from ....core_ext import retry
9
+ from ....utils import UserMessage
10
+ from ...base import Engine
11
+ from ...settings import SYMAI_CONFIG
12
+
13
+ logging.getLogger("openai").setLevel(logging.ERROR)
14
+ logging.getLogger("requests").setLevel(logging.ERROR)
15
+ logging.getLogger("urllib").setLevel(logging.ERROR)
16
+ logging.getLogger("httpx").setLevel(logging.ERROR)
17
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
18
+
19
+
20
+ _NON_VERBOSE_OUTPUT = (
21
+ "<META_INSTRUCTION/>\n"
22
+ "You do not output anything else, like verbose preambles or post explanation, such as "
23
+ '"Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. '
24
+ "Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use "
25
+ "indentation, etc. Never add meta instructions information to your output!\n\n"
26
+ )
27
+
28
+
29
+ class OpenRouterEngine(Engine):
30
+ def __init__(self, api_key: str | None = None, model: str | None = None):
31
+ super().__init__()
32
+ self.config = deepcopy(SYMAI_CONFIG)
33
+ # In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
34
+ if api_key is not None and model is not None:
35
+ self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] = api_key
36
+ self.config["NEUROSYMBOLIC_ENGINE_MODEL"] = model
37
+ if self.id() != "neurosymbolic":
38
+ return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
39
+ openai.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
40
+ self.model = self.config["NEUROSYMBOLIC_ENGINE_MODEL"]
41
+ self.seed = None
42
+ self.name = self.__class__.__name__
43
+ self._last_prompt_tokens = None
44
+ self._last_messages = None
45
+
46
+ try:
47
+ self.client = openai.OpenAI(
48
+ api_key=openai.api_key, base_url="https://openrouter.ai/api/v1"
49
+ )
50
+ except Exception as exc:
51
+ UserMessage(
52
+ f"Failed to initialize OpenRouter client. Please check your OpenAI library version. Caused by: {exc}",
53
+ raise_with=ValueError,
54
+ )
55
+
56
+ def id(self) -> str:
57
+ model_name = self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
58
+ if model_name and model_name.startswith("openrouter"):
59
+ return "neurosymbolic"
60
+ return super().id()
61
+
62
+ def command(self, *args, **kwargs):
63
+ super().command(*args, **kwargs)
64
+ if "NEUROSYMBOLIC_ENGINE_API_KEY" in kwargs:
65
+ openai.api_key = kwargs["NEUROSYMBOLIC_ENGINE_API_KEY"]
66
+ if "NEUROSYMBOLIC_ENGINE_MODEL" in kwargs:
67
+ self.model = kwargs["NEUROSYMBOLIC_ENGINE_MODEL"]
68
+ if "seed" in kwargs:
69
+ self.seed = kwargs["seed"]
70
+
71
+ def compute_required_tokens(self, messages):
72
+ if self._last_prompt_tokens is not None and self._last_messages == messages:
73
+ return self._last_prompt_tokens
74
+ UserMessage(
75
+ "Token counting not implemented for this engine.", raise_with=NotImplementedError
76
+ )
77
+ return 0
78
+
79
+ def compute_remaining_tokens(self, _prompts: list) -> int:
80
+ UserMessage(
81
+ "Token counting not implemented for this engine.", raise_with=NotImplementedError
82
+ )
83
+
84
+ def _handle_prefix(self, model_name: str) -> str:
85
+ """Handle prefix for model name."""
86
+ return model_name.replace("openrouter:", "")
87
+
88
+ def _extract_thinking_content(self, output: list[str]) -> tuple[str | None, list[str]]:
89
+ """Extract thinking content from textual output using <think>...</think> tags if present."""
90
+ if not output or not output[0]:
91
+ return None, output
92
+
93
+ content = output[0]
94
+ start = content.find("<think>")
95
+ if start == -1:
96
+ return None, output
97
+
98
+ end = content.find("</think>", start + 7)
99
+ if end == -1:
100
+ return None, output
101
+
102
+ thinking_content = content[start + 7 : end].strip() or None
103
+ cleaned_content = (content[:start] + content[end + 8 :]).strip()
104
+ cleaned_output = [cleaned_content, *output[1:]]
105
+
106
+ return thinking_content, cleaned_output
107
+
108
+ # cumulative wait time is < 30s
109
+ @retry(tries=8, delay=0.5, backoff=1.5, max_delay=5, jitter=(0, 0.5))
110
+ def forward(self, argument):
111
+ kwargs = argument.kwargs
112
+ messages = argument.prop.prepared_input
113
+ payload = self._prepare_request_payload(messages, argument)
114
+ except_remedy = kwargs.get("except_remedy")
115
+
116
+ try:
117
+ res = self.client.chat.completions.create(**payload)
118
+ except Exception as exc:
119
+ if openai.api_key is None or openai.api_key == "":
120
+ msg = (
121
+ "OpenRouter API key is not set. Please set it in the config file or "
122
+ "pass it as an argument to the command method."
123
+ )
124
+ UserMessage(msg)
125
+ if (
126
+ self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] is None
127
+ or self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] == ""
128
+ ):
129
+ UserMessage(msg, raise_with=ValueError)
130
+ openai.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
131
+
132
+ callback = self.client.chat.completions.create
133
+ kwargs["model"] = (
134
+ self._handle_prefix(kwargs["model"])
135
+ if "model" in kwargs
136
+ else self._handle_prefix(self.model)
137
+ )
138
+
139
+ if except_remedy is not None:
140
+ res = except_remedy(self, exc, callback, argument)
141
+ else:
142
+ UserMessage(f"Error during generation. Caused by: {exc}", raise_with=ValueError)
143
+
144
+ prompt_tokens = getattr(res.usage, "prompt_tokens", None)
145
+ if prompt_tokens is None:
146
+ prompt_tokens = getattr(res.usage, "input_tokens", None)
147
+ self._last_prompt_tokens = prompt_tokens
148
+ self._last_messages = messages
149
+
150
+ metadata = {"raw_output": res}
151
+ if payload.get("tools"):
152
+ metadata = self._process_function_calls(res, metadata)
153
+
154
+ output = [r.message.content for r in res.choices]
155
+ thinking, output = self._extract_thinking_content(output)
156
+ if thinking:
157
+ metadata["thinking"] = thinking
158
+
159
+ return output, metadata
160
+
161
+ def _prepare_raw_input(self, argument):
162
+ if not argument.prop.processed_input:
163
+ UserMessage(
164
+ "Need to provide a prompt instruction to the engine if raw_input is enabled.",
165
+ raise_with=ValueError,
166
+ )
167
+ value = argument.prop.processed_input
168
+ if not isinstance(value, list):
169
+ if not isinstance(value, dict):
170
+ value = {"role": "user", "content": str(value)}
171
+ value = [value]
172
+ return value
173
+
174
+ def prepare(self, argument):
175
+ if argument.prop.raw_input:
176
+ argument.prop.prepared_input = self._prepare_raw_input(argument)
177
+ return
178
+ self._validate_response_format(argument)
179
+
180
+ system = self._build_system_message(argument)
181
+ user_content = self._build_user_content(argument)
182
+ user_prompt = {"role": "user", "content": user_content}
183
+ system, user_prompt = self._apply_self_prompt_if_needed(argument, system, user_prompt)
184
+
185
+ argument.prop.prepared_input = [
186
+ {"role": "system", "content": system},
187
+ user_prompt,
188
+ ]
189
+
190
+ def _validate_response_format(self, argument) -> None:
191
+ if argument.prop.response_format:
192
+ response_format = argument.prop.response_format
193
+ assert response_format.get("type") is not None, (
194
+ 'Expected format `{ "type": "json_object" }`! We are using the OpenAI compatible '
195
+ "API for OpenRouter."
196
+ )
197
+
198
+ def _build_system_message(self, argument) -> str:
199
+ system: str = ""
200
+ if argument.prop.suppress_verbose_output:
201
+ system += _NON_VERBOSE_OUTPUT
202
+ if system:
203
+ system = f"{system}\n"
204
+
205
+ ref = argument.prop.instance
206
+ static_ctxt, dyn_ctxt = ref.global_context
207
+ if len(static_ctxt) > 0:
208
+ system += f"<STATIC CONTEXT/>\n{static_ctxt}\n\n"
209
+
210
+ if len(dyn_ctxt) > 0:
211
+ system += f"<DYNAMIC CONTEXT/>\n{dyn_ctxt}\n\n"
212
+
213
+ if argument.prop.payload:
214
+ system += f"<ADDITIONAL CONTEXT/>\n{argument.prop.payload!s}\n\n"
215
+
216
+ examples = argument.prop.examples
217
+ if examples and len(examples) > 0:
218
+ system += f"<EXAMPLES/>\n{examples!s}\n\n"
219
+
220
+ if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
221
+ val = str(argument.prop.prompt)
222
+ system += f"<INSTRUCTION/>\n{val}\n\n"
223
+
224
+ if argument.prop.template_suffix:
225
+ system += (
226
+ " You will only generate content for the placeholder "
227
+ f"`{argument.prop.template_suffix!s}` following the instructions and the provided context "
228
+ "information.\n\n"
229
+ )
230
+
231
+ return system
232
+
233
+ def _build_user_content(self, argument) -> str:
234
+ return str(argument.prop.processed_input)
235
+
236
+ def _apply_self_prompt_if_needed(self, argument, system, user_prompt):
237
+ if argument.prop.instance._kwargs.get("self_prompt", False) or argument.prop.self_prompt:
238
+ self_prompter = SelfPrompt()
239
+ res = self_prompter({"user": user_prompt["content"], "system": system})
240
+ if res is None:
241
+ UserMessage("Self-prompting failed!", raise_with=ValueError)
242
+ return res["system"], {"role": "user", "content": res["user"]}
243
+ return system, user_prompt
244
+
245
+ def _process_function_calls(self, res, metadata):
246
+ hit = False
247
+ if (
248
+ hasattr(res, "choices")
249
+ and res.choices
250
+ and hasattr(res.choices[0], "message")
251
+ and res.choices[0].message
252
+ and hasattr(res.choices[0].message, "tool_calls")
253
+ and res.choices[0].message.tool_calls
254
+ ):
255
+ for tool_call in res.choices[0].message.tool_calls:
256
+ if hasattr(tool_call, "function") and tool_call.function:
257
+ if hit:
258
+ UserMessage(
259
+ "Multiple function calls detected in the response but only the first one will be processed."
260
+ )
261
+ break
262
+ try:
263
+ args_dict = json.loads(tool_call.function.arguments)
264
+ except json.JSONDecodeError:
265
+ args_dict = {}
266
+ metadata["function_call"] = {
267
+ "name": tool_call.function.name,
268
+ "arguments": args_dict,
269
+ }
270
+ hit = True
271
+ return metadata
272
+
273
+ # TODO: requires updates for reasoning
274
+ def _prepare_request_payload(self, messages, argument):
275
+ kwargs = argument.kwargs
276
+ max_tokens = kwargs.get("max_tokens")
277
+ if max_tokens is None:
278
+ max_tokens = kwargs.get("max_completion_tokens")
279
+ return {
280
+ "messages": messages,
281
+ "model": self._handle_prefix(kwargs.get("model", self.model)),
282
+ "seed": kwargs.get("seed", self.seed),
283
+ "max_tokens": max_tokens,
284
+ "stop": kwargs.get("stop"),
285
+ "temperature": kwargs.get("temperature", 1),
286
+ "frequency_penalty": kwargs.get("frequency_penalty", 0),
287
+ "presence_penalty": kwargs.get("presence_penalty", 0),
288
+ "top_p": kwargs.get("top_p", 1),
289
+ "n": kwargs.get("n", 1),
290
+ "tools": kwargs.get("tools"),
291
+ "tool_choice": kwargs.get("tool_choice"),
292
+ "response_format": kwargs.get("response_format"),
293
+ "stream": kwargs.get("stream", False),
294
+ }
@@ -9,6 +9,7 @@ service disruption.
9
9
 
10
10
  import io
11
11
  import logging
12
+ import random
12
13
  import re
13
14
  from typing import Any, ClassVar
14
15
  from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
@@ -17,7 +18,9 @@ import requests
17
18
  import trafilatura
18
19
  from bs4 import BeautifulSoup
19
20
  from pdfminer.high_level import extract_text
21
+ from requests.adapters import HTTPAdapter
20
22
  from requests.structures import CaseInsensitiveDict
23
+ from urllib3.util.retry import Retry
21
24
 
22
25
  from ....symbol import Result
23
26
  from ....utils import UserMessage
@@ -80,24 +83,49 @@ class RequestsEngine(Engine):
80
83
  "none": "None",
81
84
  }
82
85
 
83
- def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
86
+ USER_AGENT_POOL: ClassVar[list[str]] = [
87
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
88
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
89
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
90
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
91
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0",
92
+ "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
93
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
94
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
95
+ ]
96
+
97
+ def __init__(self, timeout=15, verify_ssl=True, user_agent=None, retries=3, backoff_factor=0.5, retry_status_codes=(500, 502, 503, 504)):
84
98
  """
85
99
  Args:
86
100
  timeout: Seconds to wait for network operations before aborting.
87
101
  verify_ssl: Toggle for TLS certificate verification.
88
- user_agent: Optional override for the default desktop Chrome UA.
102
+ user_agent: Optional override for user agent rotation.
103
+ retries: Number of retries for failed requests (default: 3).
104
+ backoff_factor: Multiplier for exponential backoff (default: 0.5).
105
+ retry_status_codes: HTTP status codes to retry on (default: 500, 502, 503, 504).
89
106
  """
90
107
  super().__init__()
91
108
  self.timeout = timeout
92
109
  self.verify_ssl = verify_ssl
93
110
  self.name = self.__class__.__name__
94
-
95
- headers = dict(self.DEFAULT_HEADERS)
96
- if user_agent:
97
- headers["User-Agent"] = user_agent
111
+ self._user_agent_override = user_agent
98
112
 
99
113
  self.session = requests.Session()
100
- self.session.headers.update(headers)
114
+ self.session.headers.update({k: v for k, v in self.DEFAULT_HEADERS.items() if k != "User-Agent"})
115
+
116
+ retry_strategy = Retry(
117
+ total=retries,
118
+ backoff_factor=backoff_factor,
119
+ status_forcelist=retry_status_codes,
120
+ allowed_methods=["GET", "HEAD"],
121
+ )
122
+ adapter = HTTPAdapter(max_retries=retry_strategy)
123
+ self.session.mount("http://", adapter)
124
+ self.session.mount("https://", adapter)
125
+
126
+ def _get_user_agent(self) -> str:
127
+ """Return user agent: override if set, otherwise random from pool."""
128
+ return self._user_agent_override or random.choice(self.USER_AGENT_POOL)
101
129
 
102
130
  def _maybe_set_bypass_cookies(self, url: str):
103
131
  netloc = urlparse(url).hostname
@@ -232,7 +260,7 @@ class RequestsEngine(Engine):
232
260
  # Avoid loops
233
261
  if target == resp.url:
234
262
  return resp
235
- return self.session.get(target, timeout=timeout, allow_redirects=True)
263
+ return self.session.get(target, timeout=timeout, allow_redirects=True, headers={"User-Agent": self._get_user_agent()})
236
264
 
237
265
  def _fetch_with_playwright(
238
266
  self,
@@ -259,7 +287,7 @@ class RequestsEngine(Engine):
259
287
 
260
288
  timeout_seconds = timeout if timeout is not None else self.timeout
261
289
  timeout_ms = max(int(timeout_seconds * 1000), 0)
262
- user_agent = self.session.headers.get("User-Agent")
290
+ user_agent = self._get_user_agent()
263
291
 
264
292
  parsed = urlparse(url)
265
293
  hostname = parsed.hostname or ""
@@ -348,7 +376,8 @@ class RequestsEngine(Engine):
348
376
  )
349
377
  else:
350
378
  resp = self.session.get(
351
- clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
379
+ clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl,
380
+ headers={"User-Agent": self._get_user_agent()}
352
381
  )
353
382
  resp.raise_for_status()
354
383
 
@@ -0,0 +1,13 @@
1
+ from .engine_firecrawl import FirecrawlEngine
2
+ from .engine_parallel import ParallelEngine
3
+
4
+ SEARCH_ENGINE_MAPPING = {
5
+ "firecrawl": FirecrawlEngine,
6
+ "parallel": ParallelEngine,
7
+ }
8
+
9
+ __all__ = [
10
+ "SEARCH_ENGINE_MAPPING",
11
+ "FirecrawlEngine",
12
+ "ParallelEngine",
13
+ ]