symbolicai 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
symai/__init__.py CHANGED
@@ -33,7 +33,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
33
  # Create singleton instance
34
34
  config_manager = settings.SymAIConfig()
35
35
 
36
- SYMAI_VERSION = "1.4.0"
36
+ SYMAI_VERSION = "1.5.0"
37
37
  __version__ = SYMAI_VERSION
38
38
  __root_dir__ = config_manager.config_dir
39
39
 
@@ -9,6 +9,7 @@ service disruption.
9
9
 
10
10
  import io
11
11
  import logging
12
+ import random
12
13
  import re
13
14
  from typing import Any, ClassVar
14
15
  from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
@@ -17,7 +18,9 @@ import requests
17
18
  import trafilatura
18
19
  from bs4 import BeautifulSoup
19
20
  from pdfminer.high_level import extract_text
21
+ from requests.adapters import HTTPAdapter
20
22
  from requests.structures import CaseInsensitiveDict
23
+ from urllib3.util.retry import Retry
21
24
 
22
25
  from ....symbol import Result
23
26
  from ....utils import UserMessage
@@ -80,24 +83,49 @@ class RequestsEngine(Engine):
80
83
  "none": "None",
81
84
  }
82
85
 
83
- def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
86
+ USER_AGENT_POOL: ClassVar[list[str]] = [
87
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
88
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
89
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
90
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
91
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0",
92
+ "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
93
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
94
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
95
+ ]
96
+
97
+ def __init__(self, timeout=15, verify_ssl=True, user_agent=None, retries=3, backoff_factor=0.5, retry_status_codes=(500, 502, 503, 504)):
84
98
  """
85
99
  Args:
86
100
  timeout: Seconds to wait for network operations before aborting.
87
101
  verify_ssl: Toggle for TLS certificate verification.
88
- user_agent: Optional override for the default desktop Chrome UA.
102
+ user_agent: Optional override for user agent rotation.
103
+ retries: Number of retries for failed requests (default: 3).
104
+ backoff_factor: Multiplier for exponential backoff (default: 0.5).
105
+ retry_status_codes: HTTP status codes to retry on (default: 500, 502, 503, 504).
89
106
  """
90
107
  super().__init__()
91
108
  self.timeout = timeout
92
109
  self.verify_ssl = verify_ssl
93
110
  self.name = self.__class__.__name__
94
-
95
- headers = dict(self.DEFAULT_HEADERS)
96
- if user_agent:
97
- headers["User-Agent"] = user_agent
111
+ self._user_agent_override = user_agent
98
112
 
99
113
  self.session = requests.Session()
100
- self.session.headers.update(headers)
114
+ self.session.headers.update({k: v for k, v in self.DEFAULT_HEADERS.items() if k != "User-Agent"})
115
+
116
+ retry_strategy = Retry(
117
+ total=retries,
118
+ backoff_factor=backoff_factor,
119
+ status_forcelist=retry_status_codes,
120
+ allowed_methods=["GET", "HEAD"],
121
+ )
122
+ adapter = HTTPAdapter(max_retries=retry_strategy)
123
+ self.session.mount("http://", adapter)
124
+ self.session.mount("https://", adapter)
125
+
126
+ def _get_user_agent(self) -> str:
127
+ """Return user agent: override if set, otherwise random from pool."""
128
+ return self._user_agent_override or random.choice(self.USER_AGENT_POOL)
101
129
 
102
130
  def _maybe_set_bypass_cookies(self, url: str):
103
131
  netloc = urlparse(url).hostname
@@ -232,7 +260,7 @@ class RequestsEngine(Engine):
232
260
  # Avoid loops
233
261
  if target == resp.url:
234
262
  return resp
235
- return self.session.get(target, timeout=timeout, allow_redirects=True)
263
+ return self.session.get(target, timeout=timeout, allow_redirects=True, headers={"User-Agent": self._get_user_agent()})
236
264
 
237
265
  def _fetch_with_playwright(
238
266
  self,
@@ -259,7 +287,7 @@ class RequestsEngine(Engine):
259
287
 
260
288
  timeout_seconds = timeout if timeout is not None else self.timeout
261
289
  timeout_ms = max(int(timeout_seconds * 1000), 0)
262
- user_agent = self.session.headers.get("User-Agent")
290
+ user_agent = self._get_user_agent()
263
291
 
264
292
  parsed = urlparse(url)
265
293
  hostname = parsed.hostname or ""
@@ -348,7 +376,8 @@ class RequestsEngine(Engine):
348
376
  )
349
377
  else:
350
378
  resp = self.session.get(
351
- clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
379
+ clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl,
380
+ headers={"User-Agent": self._get_user_agent()}
352
381
  )
353
382
  resp.raise_for_status()
354
383
 
@@ -0,0 +1,13 @@
1
+ from .engine_firecrawl import FirecrawlEngine
2
+ from .engine_parallel import ParallelEngine
3
+
4
+ SEARCH_ENGINE_MAPPING = {
5
+ "firecrawl": FirecrawlEngine,
6
+ "parallel": ParallelEngine,
7
+ }
8
+
9
+ __all__ = [
10
+ "SEARCH_ENGINE_MAPPING",
11
+ "FirecrawlEngine",
12
+ "ParallelEngine",
13
+ ]
@@ -0,0 +1,333 @@
1
+ import json
2
+ import logging
3
+ from copy import deepcopy
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
7
+
8
+ from firecrawl import Firecrawl
9
+ from firecrawl.v2.types import ScrapeOptions
10
+
11
+ from ....symbol import Result
12
+ from ....utils import UserMessage
13
+ from ...base import Engine
14
+ from ...settings import SYMAI_CONFIG
15
+
16
+ logging.getLogger("requests").setLevel(logging.ERROR)
17
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
18
+ logging.getLogger("httpx").setLevel(logging.ERROR)
19
+
20
+ TRACKING_KEYS = {
21
+ "utm_source",
22
+ "utm_medium",
23
+ "utm_campaign",
24
+ "utm_term",
25
+ "utm_content",
26
+ }
27
+
28
+
29
+ @dataclass
30
+ class Citation:
31
+ id: int
32
+ title: str
33
+ url: str
34
+ start: int
35
+ end: int
36
+
37
+ def __hash__(self):
38
+ return hash((self.url,))
39
+
40
+
41
+ class FirecrawlSearchResult(Result):
42
+ def __init__(
43
+ self, value: dict[str, Any] | Any, max_chars_per_result: int | None = None, **kwargs
44
+ ) -> None:
45
+ raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
46
+ super().__init__(raw_dict, **kwargs)
47
+ self._citations: list[Citation] = []
48
+ self._max_chars_per_result = max_chars_per_result
49
+ try:
50
+ text, citations = self._build_text_and_citations(raw_dict)
51
+ self._value = text
52
+ self._citations = citations
53
+ except Exception as e:
54
+ self._value = None
55
+ UserMessage(f"Failed to parse Firecrawl search response: {e}", raise_with=ValueError)
56
+
57
+ def _build_text_and_citations(self, data: dict[str, Any]) -> tuple[str, list[Citation]]:
58
+ results = []
59
+ for source in ["web", "news", "images"]:
60
+ source_data = data.get(source) or []
61
+ results.extend(source_data)
62
+
63
+ if not results:
64
+ return "", []
65
+
66
+ parts = []
67
+ citations = []
68
+ cursor = 0
69
+
70
+ for idx, item in enumerate(results, 1):
71
+ # Handle both SearchResultWeb (url/title at top level) and Document (url/title in metadata)
72
+ metadata = item.get("metadata") or {}
73
+ url = item.get("url") or metadata.get("url") or metadata.get("source_url") or ""
74
+ title = item.get("title") or metadata.get("title") or ""
75
+
76
+ if not url:
77
+ continue
78
+
79
+ # Check if this is a scraped result (has markdown content)
80
+ markdown = item.get("markdown", "")
81
+ if markdown:
82
+ content = markdown
83
+ if self._max_chars_per_result and len(content) > self._max_chars_per_result:
84
+ content = content[: self._max_chars_per_result] + "..."
85
+ result_text = f"{title}\n{url}\n{content}"
86
+ else:
87
+ description = (
88
+ item.get("description")
89
+ or item.get("snippet")
90
+ or metadata.get("description")
91
+ or ""
92
+ )
93
+ result_text = f"{title}\n{url}"
94
+ if description:
95
+ if self._max_chars_per_result and len(description) > self._max_chars_per_result:
96
+ description = description[: self._max_chars_per_result] + "..."
97
+ result_text += f"\n{description}"
98
+
99
+ if parts:
100
+ parts.append("\n\n")
101
+ cursor += 2
102
+
103
+ parts.append(result_text)
104
+ cursor += len(result_text)
105
+
106
+ marker = f"[{idx}]"
107
+ start = cursor
108
+ parts.append(marker)
109
+ cursor += len(marker)
110
+
111
+ citations.append(Citation(id=idx, title=title, url=url, start=start, end=cursor))
112
+
113
+ text = "".join(parts)
114
+ return text, citations
115
+
116
+ def __str__(self) -> str:
117
+ if isinstance(self._value, str) and self._value:
118
+ return self._value
119
+ try:
120
+ return json.dumps(self.raw, indent=2)
121
+ except TypeError:
122
+ return str(self.raw)
123
+
124
+ def _repr_html_(self) -> str:
125
+ if isinstance(self._value, str) and self._value:
126
+ return f"<pre>{self._value}</pre>"
127
+ try:
128
+ return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
129
+ except Exception:
130
+ return f"<pre>{self.raw!s}</pre>"
131
+
132
+ def get_citations(self) -> list[Citation]:
133
+ return self._citations
134
+
135
+
136
+ class FirecrawlExtractResult(Result):
137
+ """Result wrapper for Firecrawl scrape API responses."""
138
+
139
+ def __init__(self, value: Any, **kwargs) -> None:
140
+ raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
141
+ super().__init__(raw_dict, **kwargs)
142
+ try:
143
+ self._value = self._extract_content(raw_dict)
144
+ except Exception as e:
145
+ self._value = None
146
+ UserMessage(f"Failed to parse Firecrawl scrape response: {e}", raise_with=ValueError)
147
+
148
+ def _extract_content(self, data: dict[str, Any]) -> str:
149
+ content = data.get("markdown") or data.get("html") or data.get("raw_html")
150
+ if content:
151
+ return str(content)
152
+ json_data = data.get("json")
153
+ if json_data:
154
+ return json.dumps(json_data, indent=2)
155
+ return ""
156
+
157
+ def __str__(self) -> str:
158
+ try:
159
+ return str(self._value or "")
160
+ except Exception:
161
+ return ""
162
+
163
+ def _repr_html_(self) -> str:
164
+ try:
165
+ return f"<pre>{self._value or ''}</pre>"
166
+ except Exception:
167
+ return "<pre></pre>"
168
+
169
+
170
+ class FirecrawlEngine(Engine):
171
+ def __init__(self, api_key: str | None = None):
172
+ super().__init__()
173
+ self.config = deepcopy(SYMAI_CONFIG)
174
+ self.api_key = api_key or self.config.get("SEARCH_ENGINE_API_KEY")
175
+ self.model = self.config.get("SEARCH_ENGINE_MODEL")
176
+ self.name = self.__class__.__name__
177
+
178
+ if not self.api_key:
179
+ UserMessage(
180
+ "Firecrawl API key not found. Set SEARCH_ENGINE_API_KEY in config or environment.",
181
+ raise_with=ValueError,
182
+ )
183
+
184
+ try:
185
+ self.client = Firecrawl(api_key=self.api_key)
186
+ except Exception as e:
187
+ UserMessage(f"Failed to initialize Firecrawl client: {e}", raise_with=ValueError)
188
+
189
+ def id(self) -> str:
190
+ if (
191
+ self.config.get("SEARCH_ENGINE_API_KEY")
192
+ and str(self.config.get("SEARCH_ENGINE_MODEL", "")).lower() == "firecrawl"
193
+ ):
194
+ return "search"
195
+ return super().id()
196
+
197
+ def command(self, *args, **kwargs):
198
+ super().command(*args, **kwargs)
199
+ if "SEARCH_ENGINE_API_KEY" in kwargs:
200
+ self.api_key = kwargs["SEARCH_ENGINE_API_KEY"]
201
+ if "SEARCH_ENGINE_MODEL" in kwargs:
202
+ self.model = kwargs["SEARCH_ENGINE_MODEL"]
203
+
204
+ def _normalize_url(self, url: str) -> str:
205
+ parts = urlsplit(url)
206
+ filtered_query = [
207
+ (k, v)
208
+ for k, v in parse_qsl(parts.query, keep_blank_values=True)
209
+ if k not in TRACKING_KEYS and not k.lower().startswith("utm_")
210
+ ]
211
+ query = urlencode(filtered_query, doseq=True)
212
+ return urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment))
213
+
214
+ def _search(self, query: str, kwargs: dict[str, Any]):
215
+ if not query:
216
+ UserMessage(
217
+ "FirecrawlEngine._search requires a non-empty query.", raise_with=ValueError
218
+ )
219
+
220
+ max_chars_per_result = kwargs.get("max_chars_per_result")
221
+
222
+ # Build search kwargs
223
+ search_kwargs = {}
224
+ if "limit" in kwargs:
225
+ search_kwargs["limit"] = kwargs["limit"]
226
+ if "location" in kwargs:
227
+ search_kwargs["location"] = kwargs["location"]
228
+ if "tbs" in kwargs:
229
+ search_kwargs["tbs"] = kwargs["tbs"]
230
+ if "sources" in kwargs:
231
+ search_kwargs["sources"] = kwargs["sources"]
232
+ if "categories" in kwargs:
233
+ search_kwargs["categories"] = kwargs["categories"]
234
+ if "timeout" in kwargs:
235
+ search_kwargs["timeout"] = kwargs["timeout"]
236
+
237
+ # Build scrape options for search results content
238
+ scrape_opts = {}
239
+ if "formats" in kwargs:
240
+ scrape_opts["formats"] = kwargs["formats"]
241
+ if "proxy" in kwargs:
242
+ scrape_opts["proxy"] = kwargs["proxy"]
243
+ if "only_main_content" in kwargs:
244
+ scrape_opts["only_main_content"] = kwargs["only_main_content"]
245
+ if "scrape_location" in kwargs:
246
+ scrape_opts["location"] = kwargs["scrape_location"]
247
+ if "include_tags" in kwargs:
248
+ scrape_opts["include_tags"] = kwargs["include_tags"]
249
+ if "exclude_tags" in kwargs:
250
+ scrape_opts["exclude_tags"] = kwargs["exclude_tags"]
251
+
252
+ if scrape_opts:
253
+ search_kwargs["scrape_options"] = ScrapeOptions(**scrape_opts)
254
+
255
+ try:
256
+ result = self.client.search(query, **search_kwargs)
257
+ except Exception as e:
258
+ UserMessage(f"Failed to call Firecrawl Search API: {e}", raise_with=ValueError)
259
+
260
+ raw = result.model_dump() if hasattr(result, "model_dump") else result
261
+ return [FirecrawlSearchResult(result, max_chars_per_result=max_chars_per_result)], {
262
+ "raw_output": raw
263
+ }
264
+
265
+ def _extract(self, url: str, kwargs: dict[str, Any]):
266
+ normalized_url = self._normalize_url(url)
267
+
268
+ # Build scrape kwargs
269
+ scrape_kwargs = {"formats": kwargs.get("formats", ["markdown"])}
270
+ if "only_main_content" in kwargs:
271
+ scrape_kwargs["only_main_content"] = kwargs["only_main_content"]
272
+ if "timeout" in kwargs:
273
+ scrape_kwargs["timeout"] = kwargs["timeout"]
274
+ if "proxy" in kwargs:
275
+ scrape_kwargs["proxy"] = kwargs["proxy"]
276
+ if "location" in kwargs:
277
+ scrape_kwargs["location"] = kwargs["location"]
278
+ if "max_age" in kwargs:
279
+ scrape_kwargs["max_age"] = kwargs["max_age"]
280
+ if "store_in_cache" in kwargs:
281
+ scrape_kwargs["store_in_cache"] = kwargs["store_in_cache"]
282
+ if "actions" in kwargs:
283
+ scrape_kwargs["actions"] = kwargs["actions"]
284
+ if "headers" in kwargs:
285
+ scrape_kwargs["headers"] = kwargs["headers"]
286
+ if "include_tags" in kwargs:
287
+ scrape_kwargs["include_tags"] = kwargs["include_tags"]
288
+ if "exclude_tags" in kwargs:
289
+ scrape_kwargs["exclude_tags"] = kwargs["exclude_tags"]
290
+ if "wait_for" in kwargs:
291
+ scrape_kwargs["wait_for"] = kwargs["wait_for"]
292
+ if "mobile" in kwargs:
293
+ scrape_kwargs["mobile"] = kwargs["mobile"]
294
+
295
+ try:
296
+ result = self.client.scrape(normalized_url, **scrape_kwargs)
297
+ except Exception as e:
298
+ UserMessage(f"Failed to call Firecrawl Scrape API: {e}", raise_with=ValueError)
299
+
300
+ raw = result.model_dump() if hasattr(result, "model_dump") else result
301
+ return [FirecrawlExtractResult(result)], {"raw_output": raw, "final_url": normalized_url}
302
+
303
+ def forward(self, argument):
304
+ kwargs = argument.kwargs
305
+ url = argument.prop.url or kwargs.get("url")
306
+ if url:
307
+ return self._extract(str(url), kwargs)
308
+
309
+ raw_query = argument.prop.prepared_input
310
+ if raw_query is None:
311
+ raw_query = argument.prop.query
312
+
313
+ query = str(raw_query or "").strip() if raw_query else ""
314
+ if not query:
315
+ UserMessage(
316
+ "FirecrawlEngine.forward requires at least one non-empty query or url.",
317
+ raise_with=ValueError,
318
+ )
319
+
320
+ return self._search(query, kwargs)
321
+
322
+ def prepare(self, argument):
323
+ url = argument.kwargs.get("url") or argument.prop.url
324
+ if url:
325
+ argument.prop.prepared_input = str(url)
326
+ return
327
+
328
+ query = argument.prop.query
329
+ if isinstance(query, list):
330
+ argument.prop.prepared_input = " ".join(str(q) for q in query if q)
331
+ return
332
+
333
+ argument.prop.prepared_input = str(query or "").strip()
@@ -66,7 +66,7 @@ class Citation:
66
66
  return hash((self.url,))
67
67
 
68
68
 
69
- class SearchResult(Result):
69
+ class ParallelSearchResult(Result):
70
70
  def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
71
71
  super().__init__(value, **kwargs)
72
72
  if isinstance(value, dict) and value.get("error"):
@@ -286,7 +286,7 @@ class SearchResult(Result):
286
286
  return self._citations
287
287
 
288
288
 
289
- class ExtractResult(Result):
289
+ class ParallelExtractResult(Result):
290
290
  """Result wrapper for Parallel Extract API responses."""
291
291
 
292
292
  def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
@@ -485,7 +485,7 @@ class ParallelEngine(Engine):
485
485
  )
486
486
  except Exception as e:
487
487
  UserMessage(f"Failed to call Parallel Search API: {e}", raise_with=ValueError)
488
- return [SearchResult(result)], {"raw_output": result}
488
+ return [ParallelSearchResult(result)], {"raw_output": result}
489
489
 
490
490
  def _task(self, queries: list[str], kwargs: dict[str, Any]):
491
491
  processor_name = self._coerce_processor(kwargs.get("processor"))
@@ -521,7 +521,7 @@ class ParallelEngine(Engine):
521
521
  result = self._fetch_task_result(run.run_id, timeout=timeout, api_timeout=api_timeout)
522
522
 
523
523
  payload = self._task_result_to_search_payload(result)
524
- return [SearchResult(payload)], {
524
+ return [ParallelSearchResult(payload)], {
525
525
  "raw_output": result,
526
526
  "task_output": payload.get("task_output"),
527
527
  "task_output_type": payload.get("task_output_type"),
@@ -699,7 +699,7 @@ class ParallelEngine(Engine):
699
699
  )
700
700
  except Exception as e:
701
701
  UserMessage(f"Failed to call Parallel Extract API: {e}", raise_with=ValueError)
702
- return [ExtractResult(result)], {"raw_output": result, "final_url": url}
702
+ return [ParallelExtractResult(result)], {"raw_output": result, "final_url": url}
703
703
 
704
704
  def forward(self, argument):
705
705
  kwargs = argument.kwargs
symai/components.py CHANGED
@@ -1508,12 +1508,18 @@ class DynamicEngine(Expression):
1508
1508
  """Create an engine instance based on the model name."""
1509
1509
  # Deferred to avoid components <-> neurosymbolic engine circular imports.
1510
1510
  from .backend.engines.neurosymbolic import ENGINE_MAPPING # noqa
1511
- from .backend.engines.neurosymbolic.engine_cerebras import CerebrasEngine # noqa
1511
+ from .backend.engines.search import SEARCH_ENGINE_MAPPING # noqa
1512
1512
 
1513
1513
  try:
1514
+ # Check neurosymbolic engines first
1514
1515
  engine_class = ENGINE_MAPPING.get(self.model)
1515
- if engine_class is None and self.model.startswith("cerebras:"):
1516
- engine_class = CerebrasEngine
1516
+
1517
+ # Check search engines
1518
+ if engine_class is None:
1519
+ engine_class = SEARCH_ENGINE_MAPPING.get(self.model)
1520
+ if engine_class is not None:
1521
+ return engine_class(api_key=self.api_key)
1522
+
1517
1523
  if engine_class is None:
1518
1524
  UserMessage(f"Unsupported model '{self.model}'", raise_with=ValueError)
1519
1525
  return engine_class(api_key=self.api_key, model=self.model)
@@ -0,0 +1,30 @@
1
+ from ... import core
2
+ from ...backend.engines.search.engine_firecrawl import FirecrawlExtractResult, FirecrawlSearchResult
3
+ from ...symbol import Expression, Symbol
4
+
5
+
6
+ class firecrawl(Expression):
7
+ def __init__(self, *args, **kwargs):
8
+ super().__init__(*args, **kwargs)
9
+ self.name = self.__class__.__name__
10
+
11
+ def search(self, query: Symbol, **kwargs) -> FirecrawlSearchResult:
12
+ query = self._to_symbol(query)
13
+
14
+ @core.search(query=query.value, **kwargs)
15
+ def _func(_) -> FirecrawlSearchResult:
16
+ pass
17
+
18
+ return _func(self)
19
+
20
+ def scrape(self, url: str, **kwargs) -> FirecrawlExtractResult:
21
+ symbol = self._to_symbol(url)
22
+ options = dict(kwargs)
23
+ options.pop("query", None)
24
+ options["url"] = symbol.value
25
+
26
+ @core.search(query="", **options)
27
+ def _func(_, *_args, **_inner_kwargs) -> FirecrawlExtractResult:
28
+ return None
29
+
30
+ return _func(self)
@@ -1,5 +1,5 @@
1
1
  from ... import core
2
- from ...backend.engines.search.engine_parallel import ExtractResult, SearchResult
2
+ from ...backend.engines.search.engine_parallel import ParallelExtractResult, ParallelSearchResult
3
3
  from ...symbol import Expression, Symbol
4
4
 
5
5
 
@@ -8,23 +8,23 @@ class parallel(Expression):
8
8
  super().__init__(*args, **kwargs)
9
9
  self.name = self.__class__.__name__
10
10
 
11
- def search(self, query: Symbol, **kwargs) -> SearchResult:
11
+ def search(self, query: Symbol, **kwargs) -> ParallelSearchResult:
12
12
  query = self._to_symbol(query)
13
13
 
14
14
  @core.search(query=query.value, **kwargs)
15
- def _func(_) -> SearchResult:
15
+ def _func(_) -> ParallelSearchResult:
16
16
  pass
17
17
 
18
18
  return _func(self)
19
19
 
20
- def scrape(self, url: str, **kwargs) -> ExtractResult:
20
+ def scrape(self, url: str, **kwargs) -> ParallelExtractResult:
21
21
  symbol = self._to_symbol(url)
22
22
  options = dict(kwargs)
23
23
  options.pop("query", None)
24
24
  options["url"] = symbol.value
25
25
 
26
26
  @core.search(query="", **options)
27
- def _func(_, *_args, **_inner_kwargs) -> ExtractResult:
27
+ def _func(_, *_args, **_inner_kwargs) -> ParallelExtractResult:
28
28
  return None
29
29
 
30
30
  return _func(self)
symai/functional.py CHANGED
@@ -498,10 +498,9 @@ class EngineRepository:
498
498
  def get(engine_name: str, *_args, **_kwargs):
499
499
  self = EngineRepository()
500
500
  # First check if we're in the context manager that dynamically changes models
501
- if engine_name == "neurosymbolic":
502
- engine = self.get_dynamic_engine_instance()
503
- if engine is not None:
504
- return engine
501
+ dynamic_engine = self.get_dynamic_engine_instance()
502
+ if dynamic_engine is not None and engine_name in ("neurosymbolic", "search"):
503
+ return dynamic_engine
505
504
 
506
505
  # Otherwise, fallback to normal lookup:
507
506
  if engine_name not in self._engines:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: symbolicai
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: A Neurosymbolic Perspective on Large Language Models
5
5
  Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
6
6
  License: BSD 3-Clause License
@@ -113,6 +113,7 @@ Requires-Dist: openai-whisper>=20240930; extra == "whisper"
113
113
  Requires-Dist: numba>=0.62.1; extra == "whisper"
114
114
  Requires-Dist: llvmlite>=0.45.1; extra == "whisper"
115
115
  Provides-Extra: search
116
+ Requires-Dist: firecrawl-py>=4.12.0; extra == "search"
116
117
  Requires-Dist: parallel-web>=0.3.3; extra == "search"
117
118
  Provides-Extra: serpapi
118
119
  Requires-Dist: google_search_results>=2.4.2; extra == "serpapi"
@@ -1,13 +1,13 @@
1
1
  symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
2
- symai/__init__.py,sha256=s7UwW7LIsUjcCHapKxUsO0MhWH-98vO3gPUhsNxOZW8,18530
2
+ symai/__init__.py,sha256=qlqkm2OjRqXtKhIBltfB9zx0kBf4V4ygckH1RHVPAVE,18530
3
3
  symai/chat.py,sha256=DCEbmZ96wv-eitAVt6-oF6PT3JM3cT59Iy3r2Hucd_M,14100
4
- symai/components.py,sha256=s10kLvwAOjSBQQohoHGtAIKs0UHHCd_HhiRvMbNtIH0,64685
4
+ symai/components.py,sha256=XL1whwdZd6HCl0viUuXca_7d8no_xxfTGZsqE1hhwqI,64845
5
5
  symai/constraints.py,sha256=ljjB9p0qK4DrDl_u5G_Y-Y6WAH5ZHANIqLLxRtwcORs,1980
6
6
  symai/context.py,sha256=4M69MJOeWSdPTr2Y9teoNTs-nEvpzcAcr7900UgORXA,189
7
7
  symai/core.py,sha256=gI9qvTT0Skq2D0izdhAoN3RdwBtWei59KO52mKN1Sos,70420
8
8
  symai/core_ext.py,sha256=lS_BZNeUGmNhhXR-F3dFLF26_nZHq3NVaAwa4vAbkTQ,8937
9
9
  symai/exceptions.py,sha256=BxpxI8q3-7Uh_Kg9Xi2PhF6RR6CofxV1h8R07j4v47U,165
10
- symai/functional.py,sha256=C0UrpN0vJTTwS-yqLg91InjHWaQCHo6XPtxiN6wQb7c,21441
10
+ symai/functional.py,sha256=GqBs5FZPVZ3iVJ-MlO0Zvkf7cNSDgVhkt3tsL82kFrM,21457
11
11
  symai/imports.py,sha256=P5WsamkfKxsK3fs8vlrFpC6CIv5WVpMIMNue9DKJGnE,16126
12
12
  symai/interfaces.py,sha256=Z8CDdarnOVa67GCLljKjxQojDH9MhhPKBQFb0pi2WfY,3458
13
13
  symai/memory.py,sha256=Cd60UyeJk7SHNBWEYOLrmUXQy54GzQsu3Mjh0lfNQOY,3716
@@ -59,9 +59,11 @@ symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py,sha256=yWiCT
59
59
  symai/backend/engines/neurosymbolic/engine_openai_responses.py,sha256=J3P7WcQhxWSPK99uZuLClpIDlLRqLJFWYwDJHrBKox4,17830
60
60
  symai/backend/engines/ocr/engine_apilayer.py,sha256=UpC3oHBdSM6wlPVqxwMkemBd-Y0ReVwc270O_EVbRD0,2267
61
61
  symai/backend/engines/output/engine_stdout.py,sha256=BWNXACl5U-WYIJnT1pZNwZsTRMzP1XzA0A7o693mmyQ,899
62
- symai/backend/engines/scrape/engine_requests.py,sha256=yyVFT9JrZ4S6v5U_cykef-tn5iWGl1MAdpqnDaQ70TA,13821
62
+ symai/backend/engines/scrape/engine_requests.py,sha256=uXQ8PGeRN2OyM0_ioEI61rkv5PqSBE0wayAJNS7s8ZA,15819
63
+ symai/backend/engines/search/__init__.py,sha256=iW6kEBOZ-gUiPYfcIWupNgewiqLrFOBGJ643kqwQFoM,274
64
+ symai/backend/engines/search/engine_firecrawl.py,sha256=M_nxXBtvudNqRR4gTC5dXoJzf_9ofrMScYXzaGVTmaM,11990
63
65
  symai/backend/engines/search/engine_openai.py,sha256=hAEu3vPZzLTvgmNc4BSZDTcNb4ek4xYeOf8xgti2zRs,14248
64
- symai/backend/engines/search/engine_parallel.py,sha256=vhRavd_LStk6grV1aDZiHWfW9v1uDnCLX0BT8smiV84,27008
66
+ symai/backend/engines/search/engine_parallel.py,sha256=voMmeJZ5bf1x3pt7uxMJu84z6VLLG0-ZfgFUWvhM-vI,27048
65
67
  symai/backend/engines/search/engine_perplexity.py,sha256=rXnZjMCSiIRuJcNSchE58-f9zWJmYpkKMHONF_XwGnk,4100
66
68
  symai/backend/engines/search/engine_serpapi.py,sha256=ZJJBnEDoLjkpxWt_o4vFZanwqojH8ZFBWmWNnEaIbww,3618
67
69
  symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=EOUh2GCeEhZ2Av72i_AZ4NSj9e46Pl7Ft6sIErFy6FI,8387
@@ -106,6 +108,7 @@ symai/extended/interfaces/clip.py,sha256=l6vjEq3cF-wDX9cRPulyiKpDFQB8QI2609GcGtv
106
108
  symai/extended/interfaces/console.py,sha256=qeAnG80f95ArADjfpk57AaDA1cHUQSkaUrau2zGNSKs,637
107
109
  symai/extended/interfaces/dall_e.py,sha256=SSF1K17SzA-lpdHVtsfHbwRCP6XJxWqsNdXoWwcBYjw,551
108
110
  symai/extended/interfaces/file.py,sha256=1_BXHKsHm78MmBeRolA_fFWFTLuA6on7Le-ZF4S_1ds,457
111
+ symai/extended/interfaces/firecrawl.py,sha256=hGA5WxiW6EN5LNsfBSlsYzASgvz9e515TWrHGHcE21s,955
109
112
  symai/extended/interfaces/flux.py,sha256=LTY_I9UtIxnh3Nc4cBPQhQ6upB6CVZIhc1uOnFpxEIo,532
110
113
  symai/extended/interfaces/gpt_image.py,sha256=Jk5-9og440eZeRAhKmjdyhwP22wX58q0NcFuVhIFWZQ,718
111
114
  symai/extended/interfaces/input.py,sha256=CFMLf2j_a-rZ1ApaEwfgqZmWVS7_1yj_u6iiqtiOGPs,456
@@ -115,7 +118,7 @@ symai/extended/interfaces/naive_scrape.py,sha256=KPjTSBXSCr5zwHwIPgF-VwLSTD2OjVc
115
118
  symai/extended/interfaces/naive_vectordb.py,sha256=fm7DBMYYnSx7Ma7eNnCmuOVyQwNGnkiDR31oV-qNrJA,1348
116
119
  symai/extended/interfaces/ocr.py,sha256=MMxgp8ZKoM44doJPZzzrBVh2VxChs6faFu2uFYnbzfU,563
117
120
  symai/extended/interfaces/openai_search.py,sha256=UvnSihdfIwybrLDz2A-yt92aklHEHIvh0pt0hp1Dpis,528
118
- symai/extended/interfaces/parallel.py,sha256=3QL3B-HJd1mCd1XsV8Ha_63TQZi-rlA0OJjUXB3p3UU,899
121
+ symai/extended/interfaces/parallel.py,sha256=kWRcrs_vTPvZDDhKjl1Hp94ltZeiYH7K8l9zOy5jd-I,947
119
122
  symai/extended/interfaces/perplexity.py,sha256=vSUl8CfBsFhFrzxws9Lf8WgfhsoPatJf7eYRfihKRG4,529
120
123
  symai/extended/interfaces/pinecone.py,sha256=NA2t1pNQf-G-HSeewEO8jqGnitD3huBV5bucIM9vgi4,1075
121
124
  symai/extended/interfaces/python.py,sha256=EcxXQwrlhjGOS5SkRoa_cVt069vu_INDD9DIfbnUses,418
@@ -163,9 +166,9 @@ symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
163
166
  symai/server/huggingface_server.py,sha256=wSAVqFiKQsCu5UB2YYVpxJBhJ7GgQBBfePxNi265yP8,9039
164
167
  symai/server/llama_cpp_server.py,sha256=-WPTNB2cbnwtnpES4AtPM__MCasDKl83jr94JGS9tmI,2144
165
168
  symai/server/qdrant_server.py,sha256=l4r4rz29c7cO1dapXO0LQ4sHW4WF44keuz7j8v5azMc,9854
166
- symbolicai-1.4.0.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
167
- symbolicai-1.4.0.dist-info/METADATA,sha256=dlAY-AhPA52x_fmXU-i7h6rA-M1Mf0qJ00OeOUyplGs,23676
168
- symbolicai-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
169
- symbolicai-1.4.0.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
170
- symbolicai-1.4.0.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
171
- symbolicai-1.4.0.dist-info/RECORD,,
169
+ symbolicai-1.5.0.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
170
+ symbolicai-1.5.0.dist-info/METADATA,sha256=gQLPEUb1pW2VPNqCtgN-WcXeSQnfUJAWx0KTAN3vnJw,23731
171
+ symbolicai-1.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
172
+ symbolicai-1.5.0.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
173
+ symbolicai-1.5.0.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
174
+ symbolicai-1.5.0.dist-info/RECORD,,