symbolicai 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
symai/__init__.py CHANGED
@@ -33,7 +33,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
33
  # Create singleton instance
34
34
  config_manager = settings.SymAIConfig()
35
35
 
36
- SYMAI_VERSION = "1.3.0"
36
+ SYMAI_VERSION = "1.5.0"
37
37
  __version__ = SYMAI_VERSION
38
38
  __root_dir__ = config_manager.config_dir
39
39
 
@@ -4,8 +4,10 @@ import tempfile
4
4
  import urllib.request
5
5
  import uuid
6
6
  import warnings
7
+ from dataclasses import dataclass
7
8
  from pathlib import Path
8
9
  from typing import Any
10
+ from urllib.parse import urlparse
9
11
 
10
12
  import numpy as np
11
13
 
@@ -148,6 +150,108 @@ Matches:
148
150
  return f"<ul>{doc_str}</ul>"
149
151
 
150
152
 
153
+ @dataclass
154
+ class Citation:
155
+ id: int
156
+ title: str
157
+ url: str
158
+ start: int
159
+ end: int
160
+
161
+ def __hash__(self):
162
+ return hash((self.url,))
163
+
164
+
165
+ class SearchResult(Result):
166
+ def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
167
+ super().__init__(value, **kwargs)
168
+ if isinstance(value, dict) and value.get("error"):
169
+ UserMessage(value["error"], raise_with=ValueError)
170
+ results = self._coerce_results(value)
171
+ text, citations = self._build_text_and_citations(results)
172
+ self._value = text
173
+ self._citations = citations
174
+
175
+ def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
176
+ if raw is None:
177
+ return []
178
+ results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
179
+ if not results:
180
+ return []
181
+ return [item for item in results if isinstance(item, dict)]
182
+
183
+ def _source_identifier(self, item: dict[str, Any], url: str) -> str:
184
+ for key in ("source_id", "sourceId", "sourceID", "id"):
185
+ raw = item.get(key)
186
+ if raw is None:
187
+ continue
188
+ text = str(raw).strip()
189
+ if text:
190
+ return text
191
+ path = Path(urlparse(url).path)
192
+ return path.name or path.as_posix() or url
193
+
194
+ def _build_text_and_citations(self, results: list[dict[str, Any]]):
195
+ pieces = []
196
+ citations = []
197
+ cursor = 0
198
+ cid = 1
199
+ separator = "\n\n---\n\n"
200
+
201
+ for item in results:
202
+ url = str(item.get("url") or "")
203
+ if not url:
204
+ continue
205
+
206
+ title = str(item.get("title") or "")
207
+ if not title:
208
+ path = Path(urlparse(url).path)
209
+ title = path.name or url
210
+
211
+ excerpts = item.get("excerpts") or []
212
+ excerpt_parts = [ex.strip() for ex in excerpts if isinstance(ex, str) and ex.strip()]
213
+ if not excerpt_parts:
214
+ continue
215
+
216
+ combined_excerpt = "\n\n".join(excerpt_parts)
217
+ source_id = self._source_identifier(item, url)
218
+ block_body = combined_excerpt if not source_id else f"{source_id}\n\n{combined_excerpt}"
219
+
220
+ if pieces:
221
+ pieces.append(separator)
222
+ cursor += len(separator)
223
+
224
+ opening_tag = "<source>\n"
225
+ pieces.append(opening_tag)
226
+ cursor += len(opening_tag)
227
+
228
+ pieces.append(block_body)
229
+ cursor += len(block_body)
230
+
231
+ closing_tag = "\n</source>"
232
+ pieces.append(closing_tag)
233
+ cursor += len(closing_tag)
234
+
235
+ marker = f"[{cid}]"
236
+ start = cursor
237
+ pieces.append(marker)
238
+ cursor += len(marker)
239
+
240
+ citations.append(Citation(id=cid, title=title or url, url=url, start=start, end=cursor))
241
+ cid += 1
242
+
243
+ return "".join(pieces), citations
244
+
245
+ def __str__(self) -> str:
246
+ return str(self._value or "")
247
+
248
+ def _repr_html_(self) -> str:
249
+ return f"<pre>{self._value or ''}</pre>"
250
+
251
+ def get_citations(self) -> list[Citation]:
252
+ return self._citations
253
+
254
+
151
255
  class QdrantIndexEngine(Engine):
152
256
  _default_url = "http://localhost:6333"
153
257
  _default_api_key = SYMAI_CONFIG.get("INDEXING_ENGINE_API_KEY", None)
@@ -421,15 +525,18 @@ class QdrantIndexEngine(Engine):
421
525
  kwargs["index_get"] = True
422
526
  self._configure_collection(**kwargs)
423
527
 
528
+ treat_as_search_engine = False
424
529
  if operation == "search":
425
530
  # Ensure collection exists - fail fast if it doesn't
426
531
  self._ensure_collection_exists(collection_name)
427
- index_top_k = kwargs.get("index_top_k", self.index_top_k)
532
+ search_kwargs = dict(kwargs)
533
+ index_top_k = search_kwargs.pop("index_top_k", self.index_top_k)
428
534
  # Optional search parameters
429
- score_threshold = kwargs.get("score_threshold")
535
+ score_threshold = search_kwargs.pop("score_threshold", None)
430
536
  # Accept both `query_filter` and `filter` for convenience
431
- raw_filter = kwargs.get("query_filter", kwargs.get("filter"))
537
+ raw_filter = search_kwargs.pop("query_filter", search_kwargs.pop("filter", None))
432
538
  query_filter = self._build_query_filter(raw_filter)
539
+ treat_as_search_engine = bool(search_kwargs.pop("treat_as_search_engine", False))
433
540
 
434
541
  # Use shared search helper that already handles retries and normalization
435
542
  rsp = self._search_sync(
@@ -438,6 +545,7 @@ class QdrantIndexEngine(Engine):
438
545
  limit=index_top_k,
439
546
  score_threshold=score_threshold,
440
547
  query_filter=query_filter,
548
+ **search_kwargs,
441
549
  )
442
550
  elif operation == "add":
443
551
  # Create collection if it doesn't exist (only for write operations)
@@ -462,7 +570,10 @@ class QdrantIndexEngine(Engine):
462
570
 
463
571
  metadata = {}
464
572
 
465
- rsp = QdrantResult(rsp, query, embedding)
573
+ if operation == "search" and treat_as_search_engine:
574
+ rsp = self._format_search_results(rsp, collection_name)
575
+ else:
576
+ rsp = QdrantResult(rsp, query, embedding)
466
577
  return [rsp], metadata
467
578
 
468
579
  def prepare(self, argument):
@@ -513,7 +624,33 @@ class QdrantIndexEngine(Engine):
513
624
  jitter=self.jitter,
514
625
  )
515
626
  def _func():
627
+ qdrant_kwargs = dict(kwargs)
516
628
  query_vector_normalized = self._normalize_vector(query_vector)
629
+ with_payload = qdrant_kwargs.pop("with_payload", True)
630
+ with_vectors = qdrant_kwargs.pop("with_vectors", self.index_values)
631
+ # qdrant-client `query_points` is strict about extra kwargs and will assert if any
632
+ # unknown arguments are provided. Because our engine `forward()` passes decorator
633
+ # kwargs through the stack, we must drop engine-internal fields here.
634
+ #
635
+ # Keep only kwargs that `qdrant_client.QdrantClient.query_points` accepts (besides
636
+ # those we pass explicitly).
637
+ if "filter" in qdrant_kwargs and "query_filter" not in qdrant_kwargs:
638
+ # Convenience alias supported by our public API
639
+ qdrant_kwargs["query_filter"] = qdrant_kwargs.pop("filter")
640
+
641
+ allowed_qdrant_kwargs = {
642
+ "using",
643
+ "prefetch",
644
+ "query_filter",
645
+ "search_params",
646
+ "offset",
647
+ "score_threshold",
648
+ "lookup_from",
649
+ "consistency",
650
+ "shard_key_selector",
651
+ "timeout",
652
+ }
653
+ qdrant_kwargs = {k: v for k, v in qdrant_kwargs.items() if k in allowed_qdrant_kwargs}
517
654
  # For single vector collections, pass vector directly to query parameter
518
655
  # For named vector collections, use Query(near_vector=NamedVector(name="vector_name", vector=...))
519
656
  # query_points API uses query_filter (not filter) for filtering
@@ -521,9 +658,9 @@ class QdrantIndexEngine(Engine):
521
658
  collection_name=collection_name,
522
659
  query=query_vector_normalized,
523
660
  limit=top_k,
524
- with_payload=True,
525
- with_vectors=self.index_values,
526
- **kwargs,
661
+ with_payload=with_payload,
662
+ with_vectors=with_vectors,
663
+ **qdrant_kwargs,
527
664
  )
528
665
  # query_points returns QueryResponse with .points attribute, extract it
529
666
  return response.points
@@ -860,6 +997,82 @@ class QdrantIndexEngine(Engine):
860
997
  # Use _query which handles retry logic and vector normalization
861
998
  return self._query(collection_name, query_vector, limit, **search_kwargs)
862
999
 
1000
+ def _resolve_payload_url(
1001
+ self, payload: dict[str, Any], collection_name: str, point_id: Any
1002
+ ) -> str:
1003
+ source = (
1004
+ payload.get("source")
1005
+ or payload.get("url")
1006
+ or payload.get("file_path")
1007
+ or payload.get("path")
1008
+ )
1009
+ if isinstance(source, str) and source:
1010
+ if source.startswith(("http://", "https://", "file://")):
1011
+ return source
1012
+
1013
+ source_path = Path(source).expanduser()
1014
+ try:
1015
+ resolved = source_path.resolve()
1016
+ if resolved.exists() or source_path.is_absolute():
1017
+ return resolved.as_uri()
1018
+ except Exception:
1019
+ return str(source_path)
1020
+ return str(source_path)
1021
+
1022
+ return f"qdrant://{collection_name}/{point_id}"
1023
+
1024
+ def _resolve_payload_title(self, payload: dict[str, Any], url: str, page: Any) -> str:
1025
+ raw_title = payload.get("title")
1026
+ if isinstance(raw_title, str) and raw_title.strip():
1027
+ base = raw_title.strip()
1028
+ else:
1029
+ parsed = urlparse(url)
1030
+ path_part = parsed.path or url
1031
+ base = Path(path_part).stem or url
1032
+
1033
+ try:
1034
+ page_int = int(page) if page is not None else None
1035
+ except (TypeError, ValueError):
1036
+ page_int = None
1037
+
1038
+ if Path(urlparse(url).path).suffix.lower() == ".pdf" and page_int is not None:
1039
+ base = f"{base}#p{page_int}"
1040
+
1041
+ return base
1042
+
1043
+ def _format_search_results(self, points: list[ScoredPoint] | None, collection_name: str):
1044
+ results: list[dict[str, Any]] = []
1045
+
1046
+ for point in points or []:
1047
+ payload = getattr(point, "payload", {}) or {}
1048
+ text = payload.get("text") or payload.get("content")
1049
+ if isinstance(text, list):
1050
+ text = " ".join([t for t in text if isinstance(t, str)])
1051
+ if not isinstance(text, str):
1052
+ continue
1053
+ excerpt = text.strip()
1054
+ if not excerpt:
1055
+ continue
1056
+
1057
+ page = payload.get("page") or payload.get("page_number") or payload.get("pageIndex")
1058
+ url = self._resolve_payload_url(payload, collection_name, getattr(point, "id", ""))
1059
+ title = self._resolve_payload_title(payload, url, page)
1060
+
1061
+ results.append(
1062
+ {
1063
+ "url": url,
1064
+ "title": title,
1065
+ "excerpts": [excerpt],
1066
+ "source_id": payload.get("source_id")
1067
+ or payload.get("sourceId")
1068
+ or payload.get("chunk_id")
1069
+ or payload.get("chunkId")
1070
+ or getattr(point, "id", None),
1071
+ }
1072
+ )
1073
+
1074
+ return SearchResult({"results": results})
1075
+
863
1076
  async def search(
864
1077
  self,
865
1078
  collection_name: str,
@@ -923,7 +1136,7 @@ class QdrantIndexEngine(Engine):
923
1136
  if tmp_path.exists():
924
1137
  tmp_path.unlink()
925
1138
 
926
- async def chunk_and_upsert( # noqa: C901
1139
+ async def chunk_and_upsert(
927
1140
  self,
928
1141
  collection_name: str,
929
1142
  text: str | Symbol | None = None,
@@ -1001,8 +1214,7 @@ class QdrantIndexEngine(Engine):
1001
1214
  # Add source to metadata if not already present
1002
1215
  if metadata is None:
1003
1216
  metadata = {}
1004
- if "source" not in metadata:
1005
- metadata["source"] = doc_path.name
1217
+ metadata["source"] = str(doc_path.resolve())
1006
1218
 
1007
1219
  # Handle document_url: download and read file using FileReader
1008
1220
  elif document_url is not None:
@@ -9,6 +9,7 @@ service disruption.
9
9
 
10
10
  import io
11
11
  import logging
12
+ import random
12
13
  import re
13
14
  from typing import Any, ClassVar
14
15
  from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
@@ -17,7 +18,9 @@ import requests
17
18
  import trafilatura
18
19
  from bs4 import BeautifulSoup
19
20
  from pdfminer.high_level import extract_text
21
+ from requests.adapters import HTTPAdapter
20
22
  from requests.structures import CaseInsensitiveDict
23
+ from urllib3.util.retry import Retry
21
24
 
22
25
  from ....symbol import Result
23
26
  from ....utils import UserMessage
@@ -80,24 +83,49 @@ class RequestsEngine(Engine):
80
83
  "none": "None",
81
84
  }
82
85
 
83
- def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
86
+ USER_AGENT_POOL: ClassVar[list[str]] = [
87
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
88
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
89
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
90
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
91
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0",
92
+ "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
93
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
94
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
95
+ ]
96
+
97
+ def __init__(self, timeout=15, verify_ssl=True, user_agent=None, retries=3, backoff_factor=0.5, retry_status_codes=(500, 502, 503, 504)):
84
98
  """
85
99
  Args:
86
100
  timeout: Seconds to wait for network operations before aborting.
87
101
  verify_ssl: Toggle for TLS certificate verification.
88
- user_agent: Optional override for the default desktop Chrome UA.
102
+ user_agent: Optional override for user agent rotation.
103
+ retries: Number of retries for failed requests (default: 3).
104
+ backoff_factor: Multiplier for exponential backoff (default: 0.5).
105
+ retry_status_codes: HTTP status codes to retry on (default: 500, 502, 503, 504).
89
106
  """
90
107
  super().__init__()
91
108
  self.timeout = timeout
92
109
  self.verify_ssl = verify_ssl
93
110
  self.name = self.__class__.__name__
94
-
95
- headers = dict(self.DEFAULT_HEADERS)
96
- if user_agent:
97
- headers["User-Agent"] = user_agent
111
+ self._user_agent_override = user_agent
98
112
 
99
113
  self.session = requests.Session()
100
- self.session.headers.update(headers)
114
+ self.session.headers.update({k: v for k, v in self.DEFAULT_HEADERS.items() if k != "User-Agent"})
115
+
116
+ retry_strategy = Retry(
117
+ total=retries,
118
+ backoff_factor=backoff_factor,
119
+ status_forcelist=retry_status_codes,
120
+ allowed_methods=["GET", "HEAD"],
121
+ )
122
+ adapter = HTTPAdapter(max_retries=retry_strategy)
123
+ self.session.mount("http://", adapter)
124
+ self.session.mount("https://", adapter)
125
+
126
+ def _get_user_agent(self) -> str:
127
+ """Return user agent: override if set, otherwise random from pool."""
128
+ return self._user_agent_override or random.choice(self.USER_AGENT_POOL)
101
129
 
102
130
  def _maybe_set_bypass_cookies(self, url: str):
103
131
  netloc = urlparse(url).hostname
@@ -232,7 +260,7 @@ class RequestsEngine(Engine):
232
260
  # Avoid loops
233
261
  if target == resp.url:
234
262
  return resp
235
- return self.session.get(target, timeout=timeout, allow_redirects=True)
263
+ return self.session.get(target, timeout=timeout, allow_redirects=True, headers={"User-Agent": self._get_user_agent()})
236
264
 
237
265
  def _fetch_with_playwright(
238
266
  self,
@@ -259,7 +287,7 @@ class RequestsEngine(Engine):
259
287
 
260
288
  timeout_seconds = timeout if timeout is not None else self.timeout
261
289
  timeout_ms = max(int(timeout_seconds * 1000), 0)
262
- user_agent = self.session.headers.get("User-Agent")
290
+ user_agent = self._get_user_agent()
263
291
 
264
292
  parsed = urlparse(url)
265
293
  hostname = parsed.hostname or ""
@@ -348,7 +376,8 @@ class RequestsEngine(Engine):
348
376
  )
349
377
  else:
350
378
  resp = self.session.get(
351
- clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
379
+ clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl,
380
+ headers={"User-Agent": self._get_user_agent()}
352
381
  )
353
382
  resp.raise_for_status()
354
383
 
@@ -0,0 +1,13 @@
1
+ from .engine_firecrawl import FirecrawlEngine
2
+ from .engine_parallel import ParallelEngine
3
+
4
+ SEARCH_ENGINE_MAPPING = {
5
+ "firecrawl": FirecrawlEngine,
6
+ "parallel": ParallelEngine,
7
+ }
8
+
9
+ __all__ = [
10
+ "SEARCH_ENGINE_MAPPING",
11
+ "FirecrawlEngine",
12
+ "ParallelEngine",
13
+ ]
@@ -0,0 +1,333 @@
1
+ import json
2
+ import logging
3
+ from copy import deepcopy
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
7
+
8
+ from firecrawl import Firecrawl
9
+ from firecrawl.v2.types import ScrapeOptions
10
+
11
+ from ....symbol import Result
12
+ from ....utils import UserMessage
13
+ from ...base import Engine
14
+ from ...settings import SYMAI_CONFIG
15
+
16
+ logging.getLogger("requests").setLevel(logging.ERROR)
17
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
18
+ logging.getLogger("httpx").setLevel(logging.ERROR)
19
+
20
+ TRACKING_KEYS = {
21
+ "utm_source",
22
+ "utm_medium",
23
+ "utm_campaign",
24
+ "utm_term",
25
+ "utm_content",
26
+ }
27
+
28
+
29
+ @dataclass
30
+ class Citation:
31
+ id: int
32
+ title: str
33
+ url: str
34
+ start: int
35
+ end: int
36
+
37
+ def __hash__(self):
38
+ return hash((self.url,))
39
+
40
+
41
+ class FirecrawlSearchResult(Result):
42
+ def __init__(
43
+ self, value: dict[str, Any] | Any, max_chars_per_result: int | None = None, **kwargs
44
+ ) -> None:
45
+ raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
46
+ super().__init__(raw_dict, **kwargs)
47
+ self._citations: list[Citation] = []
48
+ self._max_chars_per_result = max_chars_per_result
49
+ try:
50
+ text, citations = self._build_text_and_citations(raw_dict)
51
+ self._value = text
52
+ self._citations = citations
53
+ except Exception as e:
54
+ self._value = None
55
+ UserMessage(f"Failed to parse Firecrawl search response: {e}", raise_with=ValueError)
56
+
57
+ def _build_text_and_citations(self, data: dict[str, Any]) -> tuple[str, list[Citation]]:
58
+ results = []
59
+ for source in ["web", "news", "images"]:
60
+ source_data = data.get(source) or []
61
+ results.extend(source_data)
62
+
63
+ if not results:
64
+ return "", []
65
+
66
+ parts = []
67
+ citations = []
68
+ cursor = 0
69
+
70
+ for idx, item in enumerate(results, 1):
71
+ # Handle both SearchResultWeb (url/title at top level) and Document (url/title in metadata)
72
+ metadata = item.get("metadata") or {}
73
+ url = item.get("url") or metadata.get("url") or metadata.get("source_url") or ""
74
+ title = item.get("title") or metadata.get("title") or ""
75
+
76
+ if not url:
77
+ continue
78
+
79
+ # Check if this is a scraped result (has markdown content)
80
+ markdown = item.get("markdown", "")
81
+ if markdown:
82
+ content = markdown
83
+ if self._max_chars_per_result and len(content) > self._max_chars_per_result:
84
+ content = content[: self._max_chars_per_result] + "..."
85
+ result_text = f"{title}\n{url}\n{content}"
86
+ else:
87
+ description = (
88
+ item.get("description")
89
+ or item.get("snippet")
90
+ or metadata.get("description")
91
+ or ""
92
+ )
93
+ result_text = f"{title}\n{url}"
94
+ if description:
95
+ if self._max_chars_per_result and len(description) > self._max_chars_per_result:
96
+ description = description[: self._max_chars_per_result] + "..."
97
+ result_text += f"\n{description}"
98
+
99
+ if parts:
100
+ parts.append("\n\n")
101
+ cursor += 2
102
+
103
+ parts.append(result_text)
104
+ cursor += len(result_text)
105
+
106
+ marker = f"[{idx}]"
107
+ start = cursor
108
+ parts.append(marker)
109
+ cursor += len(marker)
110
+
111
+ citations.append(Citation(id=idx, title=title, url=url, start=start, end=cursor))
112
+
113
+ text = "".join(parts)
114
+ return text, citations
115
+
116
+ def __str__(self) -> str:
117
+ if isinstance(self._value, str) and self._value:
118
+ return self._value
119
+ try:
120
+ return json.dumps(self.raw, indent=2)
121
+ except TypeError:
122
+ return str(self.raw)
123
+
124
+ def _repr_html_(self) -> str:
125
+ if isinstance(self._value, str) and self._value:
126
+ return f"<pre>{self._value}</pre>"
127
+ try:
128
+ return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
129
+ except Exception:
130
+ return f"<pre>{self.raw!s}</pre>"
131
+
132
+ def get_citations(self) -> list[Citation]:
133
+ return self._citations
134
+
135
+
136
+ class FirecrawlExtractResult(Result):
137
+ """Result wrapper for Firecrawl scrape API responses."""
138
+
139
+ def __init__(self, value: Any, **kwargs) -> None:
140
+ raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
141
+ super().__init__(raw_dict, **kwargs)
142
+ try:
143
+ self._value = self._extract_content(raw_dict)
144
+ except Exception as e:
145
+ self._value = None
146
+ UserMessage(f"Failed to parse Firecrawl scrape response: {e}", raise_with=ValueError)
147
+
148
+ def _extract_content(self, data: dict[str, Any]) -> str:
149
+ content = data.get("markdown") or data.get("html") or data.get("raw_html")
150
+ if content:
151
+ return str(content)
152
+ json_data = data.get("json")
153
+ if json_data:
154
+ return json.dumps(json_data, indent=2)
155
+ return ""
156
+
157
+ def __str__(self) -> str:
158
+ try:
159
+ return str(self._value or "")
160
+ except Exception:
161
+ return ""
162
+
163
+ def _repr_html_(self) -> str:
164
+ try:
165
+ return f"<pre>{self._value or ''}</pre>"
166
+ except Exception:
167
+ return "<pre></pre>"
168
+
169
+
170
+ class FirecrawlEngine(Engine):
171
+ def __init__(self, api_key: str | None = None):
172
+ super().__init__()
173
+ self.config = deepcopy(SYMAI_CONFIG)
174
+ self.api_key = api_key or self.config.get("SEARCH_ENGINE_API_KEY")
175
+ self.model = self.config.get("SEARCH_ENGINE_MODEL")
176
+ self.name = self.__class__.__name__
177
+
178
+ if not self.api_key:
179
+ UserMessage(
180
+ "Firecrawl API key not found. Set SEARCH_ENGINE_API_KEY in config or environment.",
181
+ raise_with=ValueError,
182
+ )
183
+
184
+ try:
185
+ self.client = Firecrawl(api_key=self.api_key)
186
+ except Exception as e:
187
+ UserMessage(f"Failed to initialize Firecrawl client: {e}", raise_with=ValueError)
188
+
189
+ def id(self) -> str:
190
+ if (
191
+ self.config.get("SEARCH_ENGINE_API_KEY")
192
+ and str(self.config.get("SEARCH_ENGINE_MODEL", "")).lower() == "firecrawl"
193
+ ):
194
+ return "search"
195
+ return super().id()
196
+
197
+ def command(self, *args, **kwargs):
198
+ super().command(*args, **kwargs)
199
+ if "SEARCH_ENGINE_API_KEY" in kwargs:
200
+ self.api_key = kwargs["SEARCH_ENGINE_API_KEY"]
201
+ if "SEARCH_ENGINE_MODEL" in kwargs:
202
+ self.model = kwargs["SEARCH_ENGINE_MODEL"]
203
+
204
+ def _normalize_url(self, url: str) -> str:
205
+ parts = urlsplit(url)
206
+ filtered_query = [
207
+ (k, v)
208
+ for k, v in parse_qsl(parts.query, keep_blank_values=True)
209
+ if k not in TRACKING_KEYS and not k.lower().startswith("utm_")
210
+ ]
211
+ query = urlencode(filtered_query, doseq=True)
212
+ return urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment))
213
+
214
+ def _search(self, query: str, kwargs: dict[str, Any]):
215
+ if not query:
216
+ UserMessage(
217
+ "FirecrawlEngine._search requires a non-empty query.", raise_with=ValueError
218
+ )
219
+
220
+ max_chars_per_result = kwargs.get("max_chars_per_result")
221
+
222
+ # Build search kwargs
223
+ search_kwargs = {}
224
+ if "limit" in kwargs:
225
+ search_kwargs["limit"] = kwargs["limit"]
226
+ if "location" in kwargs:
227
+ search_kwargs["location"] = kwargs["location"]
228
+ if "tbs" in kwargs:
229
+ search_kwargs["tbs"] = kwargs["tbs"]
230
+ if "sources" in kwargs:
231
+ search_kwargs["sources"] = kwargs["sources"]
232
+ if "categories" in kwargs:
233
+ search_kwargs["categories"] = kwargs["categories"]
234
+ if "timeout" in kwargs:
235
+ search_kwargs["timeout"] = kwargs["timeout"]
236
+
237
+ # Build scrape options for search results content
238
+ scrape_opts = {}
239
+ if "formats" in kwargs:
240
+ scrape_opts["formats"] = kwargs["formats"]
241
+ if "proxy" in kwargs:
242
+ scrape_opts["proxy"] = kwargs["proxy"]
243
+ if "only_main_content" in kwargs:
244
+ scrape_opts["only_main_content"] = kwargs["only_main_content"]
245
+ if "scrape_location" in kwargs:
246
+ scrape_opts["location"] = kwargs["scrape_location"]
247
+ if "include_tags" in kwargs:
248
+ scrape_opts["include_tags"] = kwargs["include_tags"]
249
+ if "exclude_tags" in kwargs:
250
+ scrape_opts["exclude_tags"] = kwargs["exclude_tags"]
251
+
252
+ if scrape_opts:
253
+ search_kwargs["scrape_options"] = ScrapeOptions(**scrape_opts)
254
+
255
+ try:
256
+ result = self.client.search(query, **search_kwargs)
257
+ except Exception as e:
258
+ UserMessage(f"Failed to call Firecrawl Search API: {e}", raise_with=ValueError)
259
+
260
+ raw = result.model_dump() if hasattr(result, "model_dump") else result
261
+ return [FirecrawlSearchResult(result, max_chars_per_result=max_chars_per_result)], {
262
+ "raw_output": raw
263
+ }
264
+
265
+ def _extract(self, url: str, kwargs: dict[str, Any]):
266
+ normalized_url = self._normalize_url(url)
267
+
268
+ # Build scrape kwargs
269
+ scrape_kwargs = {"formats": kwargs.get("formats", ["markdown"])}
270
+ if "only_main_content" in kwargs:
271
+ scrape_kwargs["only_main_content"] = kwargs["only_main_content"]
272
+ if "timeout" in kwargs:
273
+ scrape_kwargs["timeout"] = kwargs["timeout"]
274
+ if "proxy" in kwargs:
275
+ scrape_kwargs["proxy"] = kwargs["proxy"]
276
+ if "location" in kwargs:
277
+ scrape_kwargs["location"] = kwargs["location"]
278
+ if "max_age" in kwargs:
279
+ scrape_kwargs["max_age"] = kwargs["max_age"]
280
+ if "store_in_cache" in kwargs:
281
+ scrape_kwargs["store_in_cache"] = kwargs["store_in_cache"]
282
+ if "actions" in kwargs:
283
+ scrape_kwargs["actions"] = kwargs["actions"]
284
+ if "headers" in kwargs:
285
+ scrape_kwargs["headers"] = kwargs["headers"]
286
+ if "include_tags" in kwargs:
287
+ scrape_kwargs["include_tags"] = kwargs["include_tags"]
288
+ if "exclude_tags" in kwargs:
289
+ scrape_kwargs["exclude_tags"] = kwargs["exclude_tags"]
290
+ if "wait_for" in kwargs:
291
+ scrape_kwargs["wait_for"] = kwargs["wait_for"]
292
+ if "mobile" in kwargs:
293
+ scrape_kwargs["mobile"] = kwargs["mobile"]
294
+
295
+ try:
296
+ result = self.client.scrape(normalized_url, **scrape_kwargs)
297
+ except Exception as e:
298
+ UserMessage(f"Failed to call Firecrawl Scrape API: {e}", raise_with=ValueError)
299
+
300
+ raw = result.model_dump() if hasattr(result, "model_dump") else result
301
+ return [FirecrawlExtractResult(result)], {"raw_output": raw, "final_url": normalized_url}
302
+
303
+ def forward(self, argument):
304
+ kwargs = argument.kwargs
305
+ url = argument.prop.url or kwargs.get("url")
306
+ if url:
307
+ return self._extract(str(url), kwargs)
308
+
309
+ raw_query = argument.prop.prepared_input
310
+ if raw_query is None:
311
+ raw_query = argument.prop.query
312
+
313
+ query = str(raw_query or "").strip() if raw_query else ""
314
+ if not query:
315
+ UserMessage(
316
+ "FirecrawlEngine.forward requires at least one non-empty query or url.",
317
+ raise_with=ValueError,
318
+ )
319
+
320
+ return self._search(query, kwargs)
321
+
322
+ def prepare(self, argument):
323
+ url = argument.kwargs.get("url") or argument.prop.url
324
+ if url:
325
+ argument.prop.prepared_input = str(url)
326
+ return
327
+
328
+ query = argument.prop.query
329
+ if isinstance(query, list):
330
+ argument.prop.prepared_input = " ".join(str(q) for q in query if q)
331
+ return
332
+
333
+ argument.prop.prepared_input = str(query or "").strip()
@@ -66,7 +66,7 @@ class Citation:
66
66
  return hash((self.url,))
67
67
 
68
68
 
69
- class SearchResult(Result):
69
+ class ParallelSearchResult(Result):
70
70
  def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
71
71
  super().__init__(value, **kwargs)
72
72
  if isinstance(value, dict) and value.get("error"):
@@ -286,7 +286,7 @@ class SearchResult(Result):
286
286
  return self._citations
287
287
 
288
288
 
289
- class ExtractResult(Result):
289
+ class ParallelExtractResult(Result):
290
290
  """Result wrapper for Parallel Extract API responses."""
291
291
 
292
292
  def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
@@ -485,7 +485,7 @@ class ParallelEngine(Engine):
485
485
  )
486
486
  except Exception as e:
487
487
  UserMessage(f"Failed to call Parallel Search API: {e}", raise_with=ValueError)
488
- return [SearchResult(result)], {"raw_output": result}
488
+ return [ParallelSearchResult(result)], {"raw_output": result}
489
489
 
490
490
  def _task(self, queries: list[str], kwargs: dict[str, Any]):
491
491
  processor_name = self._coerce_processor(kwargs.get("processor"))
@@ -521,7 +521,7 @@ class ParallelEngine(Engine):
521
521
  result = self._fetch_task_result(run.run_id, timeout=timeout, api_timeout=api_timeout)
522
522
 
523
523
  payload = self._task_result_to_search_payload(result)
524
- return [SearchResult(payload)], {
524
+ return [ParallelSearchResult(payload)], {
525
525
  "raw_output": result,
526
526
  "task_output": payload.get("task_output"),
527
527
  "task_output_type": payload.get("task_output_type"),
@@ -699,7 +699,7 @@ class ParallelEngine(Engine):
699
699
  )
700
700
  except Exception as e:
701
701
  UserMessage(f"Failed to call Parallel Extract API: {e}", raise_with=ValueError)
702
- return [ExtractResult(result)], {"raw_output": result, "final_url": url}
702
+ return [ParallelExtractResult(result)], {"raw_output": result, "final_url": url}
703
703
 
704
704
  def forward(self, argument):
705
705
  kwargs = argument.kwargs
symai/components.py CHANGED
@@ -1508,12 +1508,18 @@ class DynamicEngine(Expression):
1508
1508
  """Create an engine instance based on the model name."""
1509
1509
  # Deferred to avoid components <-> neurosymbolic engine circular imports.
1510
1510
  from .backend.engines.neurosymbolic import ENGINE_MAPPING # noqa
1511
- from .backend.engines.neurosymbolic.engine_cerebras import CerebrasEngine # noqa
1511
+ from .backend.engines.search import SEARCH_ENGINE_MAPPING # noqa
1512
1512
 
1513
1513
  try:
1514
+ # Check neurosymbolic engines first
1514
1515
  engine_class = ENGINE_MAPPING.get(self.model)
1515
- if engine_class is None and self.model.startswith("cerebras:"):
1516
- engine_class = CerebrasEngine
1516
+
1517
+ # Check search engines
1518
+ if engine_class is None:
1519
+ engine_class = SEARCH_ENGINE_MAPPING.get(self.model)
1520
+ if engine_class is not None:
1521
+ return engine_class(api_key=self.api_key)
1522
+
1517
1523
  if engine_class is None:
1518
1524
  UserMessage(f"Unsupported model '{self.model}'", raise_with=ValueError)
1519
1525
  return engine_class(api_key=self.api_key, model=self.model)
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,30 @@
1
+ from ... import core
2
+ from ...backend.engines.search.engine_firecrawl import FirecrawlExtractResult, FirecrawlSearchResult
3
+ from ...symbol import Expression, Symbol
4
+
5
+
6
+ class firecrawl(Expression):
7
+ def __init__(self, *args, **kwargs):
8
+ super().__init__(*args, **kwargs)
9
+ self.name = self.__class__.__name__
10
+
11
+ def search(self, query: Symbol, **kwargs) -> FirecrawlSearchResult:
12
+ query = self._to_symbol(query)
13
+
14
+ @core.search(query=query.value, **kwargs)
15
+ def _func(_) -> FirecrawlSearchResult:
16
+ pass
17
+
18
+ return _func(self)
19
+
20
+ def scrape(self, url: str, **kwargs) -> FirecrawlExtractResult:
21
+ symbol = self._to_symbol(url)
22
+ options = dict(kwargs)
23
+ options.pop("query", None)
24
+ options["url"] = symbol.value
25
+
26
+ @core.search(query="", **options)
27
+ def _func(_, *_args, **_inner_kwargs) -> FirecrawlExtractResult:
28
+ return None
29
+
30
+ return _func(self)
@@ -0,0 +1,57 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...backend.engines.index.engine_qdrant import QdrantIndexEngine
4
+ from ...symbol import Expression, Symbol
5
+
6
+ if TYPE_CHECKING:
7
+ from ...backend.engines.index.engine_qdrant import SearchResult
8
+
9
+
10
+ class local_search(Expression):
11
+ def __init__(self, index_name: str = QdrantIndexEngine._default_index_name, *args, **kwargs):
12
+ super().__init__(*args, **kwargs)
13
+ self.index_name = index_name
14
+ self.name = self.__class__.__name__
15
+
16
+ def search(self, query: Symbol, **kwargs) -> "SearchResult":
17
+ symbol = self._to_symbol(query)
18
+ options = dict(kwargs)
19
+
20
+ index_name = options.pop("collection_name", options.pop("index_name", self.index_name))
21
+
22
+ # Normalize limit/top_k/index_top_k
23
+ index_top_k = options.pop("index_top_k", None)
24
+ if index_top_k is None:
25
+ top_k = options.pop("top_k", None)
26
+ limit = options.pop("limit", None)
27
+ index_top_k = top_k if top_k is not None else limit
28
+ if index_top_k is not None:
29
+ options["index_top_k"] = index_top_k
30
+
31
+ # Bypass decorator/EngineRepository pipeline entirely (and thus `forward()`).
32
+ # We query Qdrant directly and then format results into the same SearchResult
33
+ # structure used by `parallel.search` (citations, inline markers, etc.).
34
+ engine = QdrantIndexEngine(index_name=index_name)
35
+ try:
36
+ score_threshold = options.pop("score_threshold", None)
37
+ raw_filter = options.pop("query_filter", options.pop("filter", None))
38
+ query_filter = engine._build_query_filter(raw_filter)
39
+
40
+ # Keep `with_payload` default aligned with engine behavior; let caller override.
41
+ with_payload = options.pop("with_payload", True)
42
+ with_vectors = options.pop("with_vectors", False)
43
+
44
+ points = engine._search_sync(
45
+ collection_name=index_name,
46
+ query_vector=symbol.embedding,
47
+ limit=options.pop("index_top_k", engine.index_top_k),
48
+ score_threshold=score_threshold,
49
+ query_filter=query_filter,
50
+ with_payload=with_payload,
51
+ with_vectors=with_vectors,
52
+ **options,
53
+ )
54
+ result = engine._format_search_results(points, index_name)
55
+ finally:
56
+ del engine
57
+ return result
@@ -1,5 +1,5 @@
1
1
  from ... import core
2
- from ...backend.engines.search.engine_parallel import ExtractResult, SearchResult
2
+ from ...backend.engines.search.engine_parallel import ParallelExtractResult, ParallelSearchResult
3
3
  from ...symbol import Expression, Symbol
4
4
 
5
5
 
@@ -8,23 +8,23 @@ class parallel(Expression):
8
8
  super().__init__(*args, **kwargs)
9
9
  self.name = self.__class__.__name__
10
10
 
11
- def search(self, query: Symbol, **kwargs) -> SearchResult:
11
+ def search(self, query: Symbol, **kwargs) -> ParallelSearchResult:
12
12
  query = self._to_symbol(query)
13
13
 
14
14
  @core.search(query=query.value, **kwargs)
15
- def _func(_) -> SearchResult:
15
+ def _func(_) -> ParallelSearchResult:
16
16
  pass
17
17
 
18
18
  return _func(self)
19
19
 
20
- def scrape(self, url: str, **kwargs) -> ExtractResult:
20
+ def scrape(self, url: str, **kwargs) -> ParallelExtractResult:
21
21
  symbol = self._to_symbol(url)
22
22
  options = dict(kwargs)
23
23
  options.pop("query", None)
24
24
  options["url"] = symbol.value
25
25
 
26
26
  @core.search(query="", **options)
27
- def _func(_, *_args, **_inner_kwargs) -> ExtractResult:
27
+ def _func(_, *_args, **_inner_kwargs) -> ParallelExtractResult:
28
28
  return None
29
29
 
30
30
  return _func(self)
symai/functional.py CHANGED
@@ -498,10 +498,9 @@ class EngineRepository:
498
498
  def get(engine_name: str, *_args, **_kwargs):
499
499
  self = EngineRepository()
500
500
  # First check if we're in the context manager that dynamically changes models
501
- if engine_name == "neurosymbolic":
502
- engine = self.get_dynamic_engine_instance()
503
- if engine is not None:
504
- return engine
501
+ dynamic_engine = self.get_dynamic_engine_instance()
502
+ if dynamic_engine is not None and engine_name in ("neurosymbolic", "search"):
503
+ return dynamic_engine
505
504
 
506
505
  # Otherwise, fallback to normal lookup:
507
506
  if engine_name not in self._engines:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: symbolicai
3
- Version: 1.3.0
3
+ Version: 1.5.0
4
4
  Summary: A Neurosymbolic Perspective on Large Language Models
5
5
  Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
6
6
  License: BSD 3-Clause License
@@ -113,6 +113,7 @@ Requires-Dist: openai-whisper>=20240930; extra == "whisper"
113
113
  Requires-Dist: numba>=0.62.1; extra == "whisper"
114
114
  Requires-Dist: llvmlite>=0.45.1; extra == "whisper"
115
115
  Provides-Extra: search
116
+ Requires-Dist: firecrawl-py>=4.12.0; extra == "search"
116
117
  Requires-Dist: parallel-web>=0.3.3; extra == "search"
117
118
  Provides-Extra: serpapi
118
119
  Requires-Dist: google_search_results>=2.4.2; extra == "serpapi"
@@ -136,6 +137,8 @@ Requires-Dist: symbolicai[serpapi]; extra == "all"
136
137
  Requires-Dist: symbolicai[services]; extra == "all"
137
138
  Requires-Dist: symbolicai[solver]; extra == "all"
138
139
  Requires-Dist: symbolicai[qdrant]; extra == "all"
140
+ Provides-Extra: dev
141
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
139
142
  Dynamic: license-file
140
143
 
141
144
  # **SymbolicAI: A neuro-symbolic perspective on LLMs**
@@ -1,13 +1,13 @@
1
1
  symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
2
- symai/__init__.py,sha256=Kdk86d3uR3kr-C7S_niPdsEbvbUu1z8pBTXb4dKe6Zs,18530
2
+ symai/__init__.py,sha256=qlqkm2OjRqXtKhIBltfB9zx0kBf4V4ygckH1RHVPAVE,18530
3
3
  symai/chat.py,sha256=DCEbmZ96wv-eitAVt6-oF6PT3JM3cT59Iy3r2Hucd_M,14100
4
- symai/components.py,sha256=s10kLvwAOjSBQQohoHGtAIKs0UHHCd_HhiRvMbNtIH0,64685
4
+ symai/components.py,sha256=XL1whwdZd6HCl0viUuXca_7d8no_xxfTGZsqE1hhwqI,64845
5
5
  symai/constraints.py,sha256=ljjB9p0qK4DrDl_u5G_Y-Y6WAH5ZHANIqLLxRtwcORs,1980
6
6
  symai/context.py,sha256=4M69MJOeWSdPTr2Y9teoNTs-nEvpzcAcr7900UgORXA,189
7
7
  symai/core.py,sha256=gI9qvTT0Skq2D0izdhAoN3RdwBtWei59KO52mKN1Sos,70420
8
8
  symai/core_ext.py,sha256=lS_BZNeUGmNhhXR-F3dFLF26_nZHq3NVaAwa4vAbkTQ,8937
9
9
  symai/exceptions.py,sha256=BxpxI8q3-7Uh_Kg9Xi2PhF6RR6CofxV1h8R07j4v47U,165
10
- symai/functional.py,sha256=C0UrpN0vJTTwS-yqLg91InjHWaQCHo6XPtxiN6wQb7c,21441
10
+ symai/functional.py,sha256=GqBs5FZPVZ3iVJ-MlO0Zvkf7cNSDgVhkt3tsL82kFrM,21457
11
11
  symai/imports.py,sha256=P5WsamkfKxsK3fs8vlrFpC6CIv5WVpMIMNue9DKJGnE,16126
12
12
  symai/interfaces.py,sha256=Z8CDdarnOVa67GCLljKjxQojDH9MhhPKBQFb0pi2WfY,3458
13
13
  symai/memory.py,sha256=Cd60UyeJk7SHNBWEYOLrmUXQy54GzQsu3Mjh0lfNQOY,3716
@@ -41,7 +41,7 @@ symai/backend/engines/files/engine_io.py,sha256=4eYBz44rQYWD7VO6Pn7hVF_cOnqNuolo
41
41
  symai/backend/engines/imagecaptioning/engine_blip2.py,sha256=8lTzc8sQpuNY4AUb_ZweRKr95v-sFtTykT5ennVf6g0,2915
42
42
  symai/backend/engines/imagecaptioning/engine_llavacpp_client.py,sha256=jBsLZv0Laa4tuPyX0VQ7uwyldyO3aYIbbj73WjTbceM,6793
43
43
  symai/backend/engines/index/engine_pinecone.py,sha256=fxCew1ldUdjd9UtqnMuWFDiVz5X5BUIKZtq1iSDhj28,9132
44
- symai/backend/engines/index/engine_qdrant.py,sha256=GtWVbgaqJuATfGus0A0h7EgM_8hKlbw3fnorNJmbC_Q,43300
44
+ symai/backend/engines/index/engine_qdrant.py,sha256=U9p0kzYvHE4DjFgxnvnG_8xfEoP_W4dpaBGY5gTFMF4,50994
45
45
  symai/backend/engines/index/engine_vectordb.py,sha256=xXU8QaC2BX9O4dDjDCVYgWO4PxQMpmNlhtal6UVtV0o,8541
46
46
  symai/backend/engines/lean/engine_lean4.py,sha256=ln5nbQn5szq8nRulbREPLCPQ5bwjM_A5XAGMkfzPdT8,10102
47
47
  symai/backend/engines/neurosymbolic/__init__.py,sha256=o7HUmxcYSrIkutGYB-6_Qur3adHyrkVeWroDtqEK-YE,2279
@@ -59,9 +59,11 @@ symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py,sha256=yWiCT
59
59
  symai/backend/engines/neurosymbolic/engine_openai_responses.py,sha256=J3P7WcQhxWSPK99uZuLClpIDlLRqLJFWYwDJHrBKox4,17830
60
60
  symai/backend/engines/ocr/engine_apilayer.py,sha256=UpC3oHBdSM6wlPVqxwMkemBd-Y0ReVwc270O_EVbRD0,2267
61
61
  symai/backend/engines/output/engine_stdout.py,sha256=BWNXACl5U-WYIJnT1pZNwZsTRMzP1XzA0A7o693mmyQ,899
62
- symai/backend/engines/scrape/engine_requests.py,sha256=yyVFT9JrZ4S6v5U_cykef-tn5iWGl1MAdpqnDaQ70TA,13821
62
+ symai/backend/engines/scrape/engine_requests.py,sha256=uXQ8PGeRN2OyM0_ioEI61rkv5PqSBE0wayAJNS7s8ZA,15819
63
+ symai/backend/engines/search/__init__.py,sha256=iW6kEBOZ-gUiPYfcIWupNgewiqLrFOBGJ643kqwQFoM,274
64
+ symai/backend/engines/search/engine_firecrawl.py,sha256=M_nxXBtvudNqRR4gTC5dXoJzf_9ofrMScYXzaGVTmaM,11990
63
65
  symai/backend/engines/search/engine_openai.py,sha256=hAEu3vPZzLTvgmNc4BSZDTcNb4ek4xYeOf8xgti2zRs,14248
64
- symai/backend/engines/search/engine_parallel.py,sha256=vhRavd_LStk6grV1aDZiHWfW9v1uDnCLX0BT8smiV84,27008
66
+ symai/backend/engines/search/engine_parallel.py,sha256=voMmeJZ5bf1x3pt7uxMJu84z6VLLG0-ZfgFUWvhM-vI,27048
65
67
  symai/backend/engines/search/engine_perplexity.py,sha256=rXnZjMCSiIRuJcNSchE58-f9zWJmYpkKMHONF_XwGnk,4100
66
68
  symai/backend/engines/search/engine_serpapi.py,sha256=ZJJBnEDoLjkpxWt_o4vFZanwqojH8ZFBWmWNnEaIbww,3618
67
69
  symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=EOUh2GCeEhZ2Av72i_AZ4NSj9e46Pl7Ft6sIErFy6FI,8387
@@ -100,21 +102,23 @@ symai/extended/solver.py,sha256=Men8FcGlUdUHJCw0lb1rKAwLOGp5-d5Rnuf2sx5Q6PM,1173
100
102
  symai/extended/summarizer.py,sha256=x7yKOU-tXmvHZxmyKrPoy5_Dy9-Zet1oAcDK8uvQSRI,1052
101
103
  symai/extended/taypan_interpreter.py,sha256=yPIcI-NcpNpfDb3r3KiclP9XwzvFo_enoZOgK1JM3NI,4832
102
104
  symai/extended/vectordb.py,sha256=npCR9WBfV6RN3OQZuJAELpwz1sM6q1btKqrVaW5jPvs,13546
103
- symai/extended/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
+ symai/extended/interfaces/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
104
106
  symai/extended/interfaces/blip_2.py,sha256=wZYVzql6w_OJMUZc1c2BKx3LHrlapRprx-Q6p99_qxE,463
105
107
  symai/extended/interfaces/clip.py,sha256=l6vjEq3cF-wDX9cRPulyiKpDFQB8QI2609GcGtvqt7U,514
106
108
  symai/extended/interfaces/console.py,sha256=qeAnG80f95ArADjfpk57AaDA1cHUQSkaUrau2zGNSKs,637
107
109
  symai/extended/interfaces/dall_e.py,sha256=SSF1K17SzA-lpdHVtsfHbwRCP6XJxWqsNdXoWwcBYjw,551
108
110
  symai/extended/interfaces/file.py,sha256=1_BXHKsHm78MmBeRolA_fFWFTLuA6on7Le-ZF4S_1ds,457
111
+ symai/extended/interfaces/firecrawl.py,sha256=hGA5WxiW6EN5LNsfBSlsYzASgvz9e515TWrHGHcE21s,955
109
112
  symai/extended/interfaces/flux.py,sha256=LTY_I9UtIxnh3Nc4cBPQhQ6upB6CVZIhc1uOnFpxEIo,532
110
113
  symai/extended/interfaces/gpt_image.py,sha256=Jk5-9og440eZeRAhKmjdyhwP22wX58q0NcFuVhIFWZQ,718
111
114
  symai/extended/interfaces/input.py,sha256=CFMLf2j_a-rZ1ApaEwfgqZmWVS7_1yj_u6iiqtiOGPs,456
112
115
  symai/extended/interfaces/llava.py,sha256=yCItfGYSk35RazhEfHR4R324h-R6W5DjZYeJBonDkRU,433
116
+ symai/extended/interfaces/local_search.py,sha256=AHHRsYCUm4VttGSl_HAk5kpH34e0x_uzvgy1OXSubSs,2408
113
117
  symai/extended/interfaces/naive_scrape.py,sha256=KPjTSBXSCr5zwHwIPgF-VwLSTD2OjVcL4xALNX4l9-4,682
114
118
  symai/extended/interfaces/naive_vectordb.py,sha256=fm7DBMYYnSx7Ma7eNnCmuOVyQwNGnkiDR31oV-qNrJA,1348
115
119
  symai/extended/interfaces/ocr.py,sha256=MMxgp8ZKoM44doJPZzzrBVh2VxChs6faFu2uFYnbzfU,563
116
120
  symai/extended/interfaces/openai_search.py,sha256=UvnSihdfIwybrLDz2A-yt92aklHEHIvh0pt0hp1Dpis,528
117
- symai/extended/interfaces/parallel.py,sha256=3QL3B-HJd1mCd1XsV8Ha_63TQZi-rlA0OJjUXB3p3UU,899
121
+ symai/extended/interfaces/parallel.py,sha256=kWRcrs_vTPvZDDhKjl1Hp94ltZeiYH7K8l9zOy5jd-I,947
118
122
  symai/extended/interfaces/perplexity.py,sha256=vSUl8CfBsFhFrzxws9Lf8WgfhsoPatJf7eYRfihKRG4,529
119
123
  symai/extended/interfaces/pinecone.py,sha256=NA2t1pNQf-G-HSeewEO8jqGnitD3huBV5bucIM9vgi4,1075
120
124
  symai/extended/interfaces/python.py,sha256=EcxXQwrlhjGOS5SkRoa_cVt069vu_INDD9DIfbnUses,418
@@ -162,9 +166,9 @@ symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
162
166
  symai/server/huggingface_server.py,sha256=wSAVqFiKQsCu5UB2YYVpxJBhJ7GgQBBfePxNi265yP8,9039
163
167
  symai/server/llama_cpp_server.py,sha256=-WPTNB2cbnwtnpES4AtPM__MCasDKl83jr94JGS9tmI,2144
164
168
  symai/server/qdrant_server.py,sha256=l4r4rz29c7cO1dapXO0LQ4sHW4WF44keuz7j8v5azMc,9854
165
- symbolicai-1.3.0.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
166
- symbolicai-1.3.0.dist-info/METADATA,sha256=7xQnG02ro-9f-haIsGjx5yMXdarGnRDFujSSaLg1gCU,23603
167
- symbolicai-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
168
- symbolicai-1.3.0.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
169
- symbolicai-1.3.0.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
170
- symbolicai-1.3.0.dist-info/RECORD,,
169
+ symbolicai-1.5.0.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
170
+ symbolicai-1.5.0.dist-info/METADATA,sha256=gQLPEUb1pW2VPNqCtgN-WcXeSQnfUJAWx0KTAN3vnJw,23731
171
+ symbolicai-1.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
172
+ symbolicai-1.5.0.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
173
+ symbolicai-1.5.0.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
174
+ symbolicai-1.5.0.dist-info/RECORD,,