symbolicai 0.19.0__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +1 -1
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +2 -1
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +2 -1
- symai/backend/engines/search/engine_openai.py +69 -5
- symai/backend/engines/webscraping/engine_requests.py +208 -13
- symai/backend/mixin/anthropic.py +7 -1
- symai/extended/interfaces/naive_webscraping.py +4 -2
- symai/menu/screen.py +9 -6
- {symbolicai-0.19.0.dist-info → symbolicai-0.20.1.dist-info}/METADATA +35 -2
- {symbolicai-0.19.0.dist-info → symbolicai-0.20.1.dist-info}/RECORD +14 -13
- symbolicai-0.20.1.dist-info/licenses/LICENSE +28 -0
- {symbolicai-0.19.0.dist-info → symbolicai-0.20.1.dist-info}/WHEEL +0 -0
- {symbolicai-0.19.0.dist-info → symbolicai-0.20.1.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.19.0.dist-info → symbolicai-0.20.1.dist-info}/top_level.txt +0 -0
symai/__init__.py
CHANGED
|
@@ -56,7 +56,8 @@ class ClaudeXChatEngine(Engine, AnthropicMixin):
|
|
|
56
56
|
self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
|
|
57
57
|
('3-7' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
58
58
|
'4-0' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
59
|
-
'4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')
|
|
59
|
+
'4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
60
|
+
'4-5' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
|
|
60
61
|
return 'neurosymbolic'
|
|
61
62
|
return super().id() # default to unregistered
|
|
62
63
|
|
|
@@ -57,7 +57,8 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
|
|
|
57
57
|
self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
|
|
58
58
|
('3-7' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
|
|
59
59
|
'4-0' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
|
|
60
|
-
'4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')
|
|
60
|
+
'4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
|
|
61
|
+
'4-5' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
|
|
61
62
|
return 'neurosymbolic'
|
|
62
63
|
return super().id() # default to unregistered
|
|
63
64
|
|
|
@@ -230,6 +230,8 @@ class SearchResult(Result):
|
|
|
230
230
|
|
|
231
231
|
|
|
232
232
|
class GPTXSearchEngine(Engine):
|
|
233
|
+
MAX_ALLOWED_DOMAINS = 20
|
|
234
|
+
|
|
233
235
|
def __init__(self, api_key: str | None = None, model: str | None = None):
|
|
234
236
|
super().__init__()
|
|
235
237
|
self.config = deepcopy(SYMAI_CONFIG)
|
|
@@ -250,6 +252,65 @@ class GPTXSearchEngine(Engine):
|
|
|
250
252
|
return 'search'
|
|
251
253
|
return super().id() # default to unregistered
|
|
252
254
|
|
|
255
|
+
def _extract_netloc(self, raw_domain: str | None) -> str | None:
|
|
256
|
+
if not isinstance(raw_domain, str):
|
|
257
|
+
return None
|
|
258
|
+
candidate = raw_domain.strip()
|
|
259
|
+
if not candidate:
|
|
260
|
+
return None
|
|
261
|
+
parsed = urlsplit(candidate if '://' in candidate else f"//{candidate}")
|
|
262
|
+
netloc = parsed.netloc or parsed.path
|
|
263
|
+
if not netloc:
|
|
264
|
+
return None
|
|
265
|
+
if '@' in netloc:
|
|
266
|
+
netloc = netloc.split('@', 1)[1]
|
|
267
|
+
if ':' in netloc:
|
|
268
|
+
netloc = netloc.split(':', 1)[0]
|
|
269
|
+
netloc = netloc.strip('.').strip()
|
|
270
|
+
if not netloc:
|
|
271
|
+
return None
|
|
272
|
+
return netloc.lower()
|
|
273
|
+
|
|
274
|
+
def _normalize_allowed_domains(self, domains: list[str] | None) -> list[str]:
|
|
275
|
+
if not domains or not isinstance(domains, list):
|
|
276
|
+
return []
|
|
277
|
+
|
|
278
|
+
normalized: list[str] = []
|
|
279
|
+
seen: set[str] = set()
|
|
280
|
+
for domain in domains:
|
|
281
|
+
netloc = self._extract_netloc(domain)
|
|
282
|
+
if not netloc or netloc in seen:
|
|
283
|
+
continue
|
|
284
|
+
# Validate that netloc is actually a valid domain
|
|
285
|
+
if not self._is_domain(netloc):
|
|
286
|
+
continue
|
|
287
|
+
normalized.append(netloc)
|
|
288
|
+
seen.add(netloc)
|
|
289
|
+
if len(normalized) >= self.MAX_ALLOWED_DOMAINS:
|
|
290
|
+
break
|
|
291
|
+
return normalized
|
|
292
|
+
|
|
293
|
+
def _is_domain(self, s: str) -> bool:
|
|
294
|
+
_label_re = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?$")
|
|
295
|
+
if not s:
|
|
296
|
+
return False
|
|
297
|
+
host = s.strip().rstrip(".")
|
|
298
|
+
# If the input might be a URL, extract the hostname via urllib:
|
|
299
|
+
if "://" in host or "/" in host or "@" in host:
|
|
300
|
+
host = urlsplit(host if "://" in host else f"//{host}").hostname or ""
|
|
301
|
+
if not host:
|
|
302
|
+
return False
|
|
303
|
+
try:
|
|
304
|
+
host_ascii = host.encode("idna").decode("ascii")
|
|
305
|
+
except Exception:
|
|
306
|
+
return False
|
|
307
|
+
if len(host_ascii) > 253:
|
|
308
|
+
return False
|
|
309
|
+
labels = host_ascii.split(".")
|
|
310
|
+
if len(labels) < 2: # require a dot (reject "google")
|
|
311
|
+
return False
|
|
312
|
+
return all(_label_re.fullmatch(lbl or "") for lbl in labels)
|
|
313
|
+
|
|
253
314
|
def command(self, *args, **kwargs):
|
|
254
315
|
super().command(*args, **kwargs)
|
|
255
316
|
if 'SEARCH_ENGINE_API_KEY' in kwargs:
|
|
@@ -261,20 +322,23 @@ class GPTXSearchEngine(Engine):
|
|
|
261
322
|
messages = argument.prop.prepared_input
|
|
262
323
|
kwargs = argument.kwargs
|
|
263
324
|
|
|
264
|
-
tool_definition = {"type": "
|
|
325
|
+
tool_definition = {"type": "web_search"}
|
|
265
326
|
user_location = kwargs.get('user_location')
|
|
266
327
|
if user_location:
|
|
267
328
|
tool_definition['user_location'] = user_location
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
329
|
+
|
|
330
|
+
allowed_domains = self._normalize_allowed_domains(kwargs.get('allowed_domains'))
|
|
331
|
+
if allowed_domains:
|
|
332
|
+
tool_definition['filters'] = {
|
|
333
|
+
'allowed_domains': allowed_domains
|
|
334
|
+
}
|
|
271
335
|
|
|
272
336
|
self.model = kwargs.get('model', self.model) # Important for MetadataTracker to work correctly
|
|
273
337
|
payload = {
|
|
274
338
|
"model": self.model,
|
|
275
339
|
"input": messages,
|
|
276
340
|
"tools": [tool_definition],
|
|
277
|
-
"tool_choice": {"type": "
|
|
341
|
+
"tool_choice": {"type": "web_search"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
|
|
278
342
|
}
|
|
279
343
|
|
|
280
344
|
try:
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
WARNING: This module implements a naive web scraping engine meant for light
|
|
3
|
+
testing. It does not prevent IP bans, bot detection, or terms-of-service
|
|
4
|
+
violations. Use only where scraping is legally permitted and respect each
|
|
5
|
+
site's robots directives. For production workloads, add robust rate limiting,
|
|
6
|
+
consent handling, rotating proxies/VPNs, and ongoing monitoring to avoid
|
|
7
|
+
service disruption.
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
import io
|
|
2
11
|
import logging
|
|
3
12
|
import re
|
|
@@ -7,6 +16,7 @@ import requests
|
|
|
7
16
|
import trafilatura
|
|
8
17
|
from bs4 import BeautifulSoup
|
|
9
18
|
from pdfminer.high_level import extract_text
|
|
19
|
+
from requests.structures import CaseInsensitiveDict
|
|
10
20
|
|
|
11
21
|
from ....symbol import Result
|
|
12
22
|
from ...base import Engine
|
|
@@ -14,6 +24,7 @@ from ...base import Engine
|
|
|
14
24
|
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
|
15
25
|
logging.getLogger("trafilatura").setLevel(logging.WARNING)
|
|
16
26
|
|
|
27
|
+
|
|
17
28
|
class RequestsResult(Result):
|
|
18
29
|
def __init__(self, value, output_format="markdown", **kwargs) -> None:
|
|
19
30
|
super().__init__(value, **kwargs)
|
|
@@ -37,27 +48,54 @@ class RequestsResult(Result):
|
|
|
37
48
|
|
|
38
49
|
|
|
39
50
|
class RequestsEngine(Engine):
|
|
40
|
-
|
|
41
|
-
|
|
51
|
+
"""
|
|
52
|
+
Lightweight HTTP/Playwright fetching pipeline for content extraction.
|
|
53
|
+
|
|
54
|
+
The engine favors clarity over stealth. Helper methods normalize cookie
|
|
55
|
+
metadata before handing it to Playwright so that the headless browser and
|
|
56
|
+
the requests session stay aligned.
|
|
57
|
+
"""
|
|
58
|
+
|
|
42
59
|
COMMON_BYPASS_COOKIES = {
|
|
60
|
+
# Some forums display consent or age gates once if a friendly cookie is set.
|
|
43
61
|
"cookieconsent_status": "allow",
|
|
44
62
|
"accepted_cookies": "yes",
|
|
45
63
|
"age_verified": "1",
|
|
46
64
|
}
|
|
47
|
-
|
|
65
|
+
|
|
66
|
+
DEFAULT_HEADERS = {
|
|
67
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
68
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
69
|
+
"Chrome/120.0.0.0 Safari/537.36",
|
|
70
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
71
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
72
|
+
"DNT": "1",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
_SAMESITE_CANONICAL = {
|
|
76
|
+
"strict": "Strict",
|
|
77
|
+
"lax": "Lax",
|
|
78
|
+
"none": "None",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
|
|
82
|
+
"""
|
|
83
|
+
Args:
|
|
84
|
+
timeout: Seconds to wait for network operations before aborting.
|
|
85
|
+
verify_ssl: Toggle for TLS certificate verification.
|
|
86
|
+
user_agent: Optional override for the default desktop Chrome UA.
|
|
87
|
+
"""
|
|
48
88
|
super().__init__()
|
|
49
89
|
self.timeout = timeout
|
|
50
90
|
self.verify_ssl = verify_ssl
|
|
51
91
|
self.name = self.__class__.__name__
|
|
92
|
+
|
|
93
|
+
headers = dict(self.DEFAULT_HEADERS)
|
|
94
|
+
if user_agent:
|
|
95
|
+
headers["User-Agent"] = user_agent
|
|
96
|
+
|
|
52
97
|
self.session = requests.Session()
|
|
53
|
-
self.session.headers.update(
|
|
54
|
-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
55
|
-
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
56
|
-
"Chrome/120.0.0.0 Safari/537.36",
|
|
57
|
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
58
|
-
"Accept-Language": "en-US,en;q=0.9",
|
|
59
|
-
"DNT": "1",
|
|
60
|
-
})
|
|
98
|
+
self.session.headers.update(headers)
|
|
61
99
|
|
|
62
100
|
def _maybe_set_bypass_cookies(self, url: str):
|
|
63
101
|
netloc = urlparse(url).hostname
|
|
@@ -66,6 +104,58 @@ class RequestsEngine(Engine):
|
|
|
66
104
|
for k, v in self.COMMON_BYPASS_COOKIES.items():
|
|
67
105
|
self.session.cookies.set(k, v, domain=netloc)
|
|
68
106
|
|
|
107
|
+
@staticmethod
|
|
108
|
+
def _normalize_http_only(raw_value, key_present):
|
|
109
|
+
"""
|
|
110
|
+
Playwright expects a boolean. Cookie metadata can arrive as strings,
|
|
111
|
+
numbers, or placeholder objects, so normalize defensively.
|
|
112
|
+
"""
|
|
113
|
+
if isinstance(raw_value, bool):
|
|
114
|
+
return raw_value
|
|
115
|
+
if isinstance(raw_value, str):
|
|
116
|
+
normalized = raw_value.strip().lower()
|
|
117
|
+
if normalized in {"false", "0", "no"}:
|
|
118
|
+
return False
|
|
119
|
+
if normalized in {"true", "1", "yes"}:
|
|
120
|
+
return True
|
|
121
|
+
if raw_value is None:
|
|
122
|
+
return key_present
|
|
123
|
+
return bool(raw_value)
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def _normalize_same_site(cls, raw_value):
|
|
127
|
+
if raw_value is None:
|
|
128
|
+
return None
|
|
129
|
+
normalized = str(raw_value).strip().lower()
|
|
130
|
+
return cls._SAMESITE_CANONICAL.get(normalized)
|
|
131
|
+
|
|
132
|
+
def _playwright_cookie_payload(self, cookie, hostname):
|
|
133
|
+
"""
|
|
134
|
+
Convert a requests cookie into Playwright-friendly format or return None
|
|
135
|
+
if the cookie does not apply to the hostname.
|
|
136
|
+
"""
|
|
137
|
+
domain = (cookie.domain or hostname).lstrip(".")
|
|
138
|
+
if not hostname.endswith(domain):
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
rest_attrs = {k.lower(): v for k, v in cookie._rest.items()}
|
|
142
|
+
http_only = self._normalize_http_only(rest_attrs.get("httponly"), "httponly" in rest_attrs)
|
|
143
|
+
payload = {
|
|
144
|
+
"name": cookie.name,
|
|
145
|
+
"value": cookie.value,
|
|
146
|
+
"domain": cookie.domain or hostname,
|
|
147
|
+
"path": cookie.path or "/",
|
|
148
|
+
"httpOnly": http_only,
|
|
149
|
+
"secure": cookie.secure,
|
|
150
|
+
}
|
|
151
|
+
if cookie.expires:
|
|
152
|
+
payload["expires"] = cookie.expires
|
|
153
|
+
|
|
154
|
+
same_site = self._normalize_same_site(rest_attrs.get("samesite"))
|
|
155
|
+
if same_site:
|
|
156
|
+
payload["sameSite"] = same_site
|
|
157
|
+
return payload
|
|
158
|
+
|
|
69
159
|
def _follow_meta_refresh(self, resp, timeout=15):
|
|
70
160
|
"""
|
|
71
161
|
Some old forums use <meta http-equiv="refresh" content="0;url=...">
|
|
@@ -90,6 +180,93 @@ class RequestsEngine(Engine):
|
|
|
90
180
|
return resp
|
|
91
181
|
return self.session.get(target, timeout=timeout, allow_redirects=True)
|
|
92
182
|
|
|
183
|
+
def _fetch_with_playwright(self, url: str, wait_selector: str = None, wait_until: str = "networkidle", timeout: float = None):
|
|
184
|
+
"""
|
|
185
|
+
Render the target URL in a headless browser to execute JavaScript and
|
|
186
|
+
return a synthetic ``requests.Response`` object to keep downstream
|
|
187
|
+
processing consistent with the non-JS path.
|
|
188
|
+
"""
|
|
189
|
+
try:
|
|
190
|
+
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
|
191
|
+
logging.getLogger("playwright").setLevel(logging.WARNING)
|
|
192
|
+
except ImportError as exc:
|
|
193
|
+
raise RuntimeError(
|
|
194
|
+
"Playwright is not installed. Install symbolicai[webscraping] with Playwright extras to enable render_js."
|
|
195
|
+
) from exc
|
|
196
|
+
|
|
197
|
+
timeout_seconds = timeout if timeout is not None else self.timeout
|
|
198
|
+
timeout_ms = max(int(timeout_seconds * 1000), 0)
|
|
199
|
+
user_agent = self.session.headers.get("User-Agent")
|
|
200
|
+
|
|
201
|
+
parsed = urlparse(url)
|
|
202
|
+
hostname = parsed.hostname or ""
|
|
203
|
+
cookie_payload = []
|
|
204
|
+
if hostname:
|
|
205
|
+
for cookie in self.session.cookies:
|
|
206
|
+
payload = self._playwright_cookie_payload(cookie, hostname)
|
|
207
|
+
if payload:
|
|
208
|
+
cookie_payload.append(payload)
|
|
209
|
+
|
|
210
|
+
content = ""
|
|
211
|
+
final_url = url
|
|
212
|
+
status = 200
|
|
213
|
+
headers = CaseInsensitiveDict()
|
|
214
|
+
|
|
215
|
+
with sync_playwright() as playwright:
|
|
216
|
+
browser = playwright.chromium.launch(headless=True)
|
|
217
|
+
context = browser.new_context(
|
|
218
|
+
user_agent=user_agent,
|
|
219
|
+
java_script_enabled=True,
|
|
220
|
+
ignore_https_errors=not self.verify_ssl,
|
|
221
|
+
)
|
|
222
|
+
if cookie_payload:
|
|
223
|
+
context.add_cookies(cookie_payload)
|
|
224
|
+
page = context.new_page()
|
|
225
|
+
|
|
226
|
+
navigation_error = None
|
|
227
|
+
response = None
|
|
228
|
+
try:
|
|
229
|
+
try:
|
|
230
|
+
response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
|
|
231
|
+
if wait_selector:
|
|
232
|
+
page.wait_for_selector(wait_selector, timeout=timeout_ms)
|
|
233
|
+
except PlaywrightTimeoutError as exc:
|
|
234
|
+
navigation_error = exc
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
content = page.content()
|
|
238
|
+
except Exception:
|
|
239
|
+
content = ""
|
|
240
|
+
|
|
241
|
+
# Always persist Playwright cookies back into the requests session.
|
|
242
|
+
for cookie in context.cookies():
|
|
243
|
+
self.session.cookies.set(
|
|
244
|
+
cookie["name"],
|
|
245
|
+
cookie["value"],
|
|
246
|
+
domain=cookie.get("domain"),
|
|
247
|
+
path=cookie.get("path", "/"),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
final_url = page.url
|
|
251
|
+
status = response.status if response is not None else 200
|
|
252
|
+
headers = CaseInsensitiveDict(response.headers if response is not None else {})
|
|
253
|
+
if "content-type" not in headers:
|
|
254
|
+
headers["Content-Type"] = "text/html; charset=utf-8"
|
|
255
|
+
|
|
256
|
+
if navigation_error and not content:
|
|
257
|
+
raise requests.exceptions.Timeout(f"Playwright timed out while rendering {url}") from navigation_error
|
|
258
|
+
finally:
|
|
259
|
+
context.close()
|
|
260
|
+
browser.close()
|
|
261
|
+
|
|
262
|
+
rendered_response = requests.Response()
|
|
263
|
+
rendered_response.status_code = status
|
|
264
|
+
rendered_response._content = content.encode("utf-8", errors="replace")
|
|
265
|
+
rendered_response.url = final_url
|
|
266
|
+
rendered_response.headers = headers
|
|
267
|
+
rendered_response.encoding = "utf-8"
|
|
268
|
+
return rendered_response
|
|
269
|
+
|
|
93
270
|
def id(self) -> str:
|
|
94
271
|
return 'webscraping'
|
|
95
272
|
|
|
@@ -111,7 +288,21 @@ class RequestsEngine(Engine):
|
|
|
111
288
|
if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}]
|
|
112
289
|
clean_url = urlunparse(parsed._replace(query=urlencode(qs)))
|
|
113
290
|
|
|
114
|
-
|
|
291
|
+
render_js = kwargs.get("render_js")
|
|
292
|
+
render_wait_selector = kwargs.get("render_wait_selector")
|
|
293
|
+
render_wait_until = kwargs.get("render_wait_until", "networkidle")
|
|
294
|
+
render_timeout = kwargs.get("render_timeout")
|
|
295
|
+
|
|
296
|
+
# Prefer fast requests path unless the caller opts into JS rendering.
|
|
297
|
+
if render_js:
|
|
298
|
+
resp = self._fetch_with_playwright(
|
|
299
|
+
clean_url,
|
|
300
|
+
wait_selector=render_wait_selector,
|
|
301
|
+
wait_until=render_wait_until,
|
|
302
|
+
timeout=render_timeout,
|
|
303
|
+
)
|
|
304
|
+
else:
|
|
305
|
+
resp = self.session.get(clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl)
|
|
115
306
|
resp.raise_for_status()
|
|
116
307
|
|
|
117
308
|
# Follow a legacy meta refresh once (do AFTER normal HTTP redirects)
|
|
@@ -120,7 +311,11 @@ class RequestsEngine(Engine):
|
|
|
120
311
|
resp2.raise_for_status()
|
|
121
312
|
resp = resp2
|
|
122
313
|
|
|
123
|
-
metadata = {
|
|
314
|
+
metadata = {
|
|
315
|
+
"response_source": "playwright" if render_js else "requests",
|
|
316
|
+
"render_js": bool(render_js),
|
|
317
|
+
"final_url": resp.url,
|
|
318
|
+
}
|
|
124
319
|
result = RequestsResult(resp, output_format)
|
|
125
320
|
return [result], metadata
|
|
126
321
|
|
symai/backend/mixin/anthropic.py
CHANGED
|
@@ -14,6 +14,8 @@ SUPPORTED_REASONING_MODELS = [
|
|
|
14
14
|
"claude-opus-4-0",
|
|
15
15
|
"claude-sonnet-4-0",
|
|
16
16
|
'claude-3-7-sonnet-latest',
|
|
17
|
+
'claude-haiku-4-5',
|
|
18
|
+
'claude-sonnet-4-5',
|
|
17
19
|
]
|
|
18
20
|
|
|
19
21
|
class AnthropicMixin:
|
|
@@ -22,6 +24,8 @@ class AnthropicMixin:
|
|
|
22
24
|
self.model == 'claude-opus-4-0' or \
|
|
23
25
|
self.model == 'claude-sonnet-4-0' or \
|
|
24
26
|
self.model == 'claude-3-7-sonnet-latest' or \
|
|
27
|
+
self.model == 'claude-haiku-4-5' or \
|
|
28
|
+
self.model == 'claude-sonnet-4-5' or \
|
|
25
29
|
self.model == 'claude-3-5-sonnet-latest' or \
|
|
26
30
|
self.model == 'claude-3-5-sonnet-20241022' or \
|
|
27
31
|
self.model == 'claude-3-5-sonnet-20240620' or \
|
|
@@ -33,7 +37,9 @@ class AnthropicMixin:
|
|
|
33
37
|
|
|
34
38
|
def api_max_response_tokens(self):
|
|
35
39
|
if self.model == 'claude-sonnet-4-0' or \
|
|
36
|
-
self.model == 'claude-3-7-sonnet-latest'
|
|
40
|
+
self.model == 'claude-3-7-sonnet-latest' or \
|
|
41
|
+
self.model == 'claude-haiku-4-5' or \
|
|
42
|
+
self.model == 'claude-sonnet-4-5':
|
|
37
43
|
return 64_000
|
|
38
44
|
if self.model == 'claude-opus-4-1' or \
|
|
39
45
|
self.model == 'claude-opus-4-0':
|
|
@@ -10,6 +10,8 @@ class naive_webscraping(Expression):
|
|
|
10
10
|
|
|
11
11
|
def __call__(self, url: str, **kwargs) -> RequestsResult:
|
|
12
12
|
@core.scrape(url=url, **kwargs)
|
|
13
|
-
def _func(_) -> RequestsResult:
|
|
14
|
-
|
|
13
|
+
def _func(_, *args, **inner_kwargs) -> RequestsResult:
|
|
14
|
+
# The fallback path may inject debugging kwargs like `error`/`stack_trace`;
|
|
15
|
+
# accept and ignore them so EngineRepository can surface structured failures.
|
|
16
|
+
return None
|
|
15
17
|
return _func(self)
|
symai/menu/screen.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from prompt_toolkit import print_formatted_text
|
|
2
4
|
|
|
3
5
|
from ..misc.console import ConsoleStyle
|
|
@@ -33,12 +35,13 @@ def show_separator(print: callable = print_formatted_text):
|
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
def show_intro_menu():
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
if os.environ.get('SYMAI_WARNINGS', '1') == '1':
|
|
39
|
+
with ConsoleStyle('extensity') as console:
|
|
40
|
+
show_splash_screen(print=console.print)
|
|
41
|
+
with ConsoleStyle('text') as console:
|
|
42
|
+
show_info_message(print=console.print)
|
|
43
|
+
with ConsoleStyle('extensity') as console:
|
|
44
|
+
show_separator(print=console.print)
|
|
42
45
|
|
|
43
46
|
if __name__ == '__main__':
|
|
44
47
|
show_intro_menu()
|
|
@@ -1,8 +1,37 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20.1
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025, ExtensityAI FlexCo
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
|
|
6
35
|
Project-URL: Homepage, https://extensity.ai
|
|
7
36
|
Project-URL: GitHub, https://github.com/ExtensityAI/symbolicai
|
|
8
37
|
Keywords: probabilistic programming,machine learning
|
|
@@ -11,6 +40,7 @@ Classifier: License :: OSI Approved :: BSD License
|
|
|
11
40
|
Classifier: Operating System :: OS Independent
|
|
12
41
|
Requires-Python: >=3.10
|
|
13
42
|
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
14
44
|
Requires-Dist: attrs>=23.2.0
|
|
15
45
|
Requires-Dist: setuptools>=70.0.0
|
|
16
46
|
Requires-Dist: toml>=0.10.2
|
|
@@ -53,6 +83,7 @@ Requires-Dist: pycryptodome>=3.20.0
|
|
|
53
83
|
Requires-Dist: httpx>=0.27.2
|
|
54
84
|
Requires-Dist: nest-asyncio>=1.6.0
|
|
55
85
|
Requires-Dist: rich>=13.9.4
|
|
86
|
+
Requires-Dist: playwright>=1.55.0
|
|
56
87
|
Provides-Extra: bitsandbytes
|
|
57
88
|
Requires-Dist: bitsandbytes>=0.43.1; extra == "bitsandbytes"
|
|
58
89
|
Provides-Extra: blip2
|
|
@@ -69,6 +100,7 @@ Provides-Extra: webscraping
|
|
|
69
100
|
Requires-Dist: beautifulsoup4>=4.12.3; extra == "webscraping"
|
|
70
101
|
Requires-Dist: trafilatura>=2.0.0; extra == "webscraping"
|
|
71
102
|
Requires-Dist: pdfminer.six; extra == "webscraping"
|
|
103
|
+
Requires-Dist: playwright>=1.45.0; extra == "webscraping"
|
|
72
104
|
Provides-Extra: llama-cpp
|
|
73
105
|
Requires-Dist: llama-cpp-python[server]>=0.3.7; extra == "llama-cpp"
|
|
74
106
|
Provides-Extra: wolframalpha
|
|
@@ -92,6 +124,7 @@ Requires-Dist: symbolicai[webscraping]; extra == "all"
|
|
|
92
124
|
Requires-Dist: symbolicai[serpapi]; extra == "all"
|
|
93
125
|
Requires-Dist: symbolicai[services]; extra == "all"
|
|
94
126
|
Requires-Dist: symbolicai[solver]; extra == "all"
|
|
127
|
+
Dynamic: license-file
|
|
95
128
|
|
|
96
129
|
# **SymbolicAI: A neuro-symbolic perspective on LLMs**
|
|
97
130
|
<img src="https://raw.githubusercontent.com/ExtensityAI/symbolicai/refs/heads/main/assets/images/banner.png">
|
|
@@ -420,7 +453,7 @@ Now, there are tools like DeepWiki that provide better documentation than we cou
|
|
|
420
453
|
|
|
421
454
|
## 📝 License
|
|
422
455
|
|
|
423
|
-
This project is licensed under the BSD-3-Clause License
|
|
456
|
+
This project is licensed under the BSD-3-Clause License.
|
|
424
457
|
|
|
425
458
|
## Like this Project?
|
|
426
459
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
|
|
2
|
-
symai/__init__.py,sha256=
|
|
2
|
+
symai/__init__.py,sha256=fsXUaTEBN_pJJ24Fh59ZPC39HPMGEH_w7Qr7Llwpyhk,16464
|
|
3
3
|
symai/chat.py,sha256=vqEe7NqSWdzr9ixkko_094SR1LIbgPLcZxQ8W7782N4,12775
|
|
4
4
|
symai/components.py,sha256=vgIq-cC8rqZG9PAPUB52Y5RGFEKrxFUCWzqzrPzLNvw,52232
|
|
5
5
|
symai/constraints.py,sha256=S1ywLB8nFQy4-beDoJz6IvLTiZHGR8Fu5RNTY4v5zG0,1641
|
|
@@ -44,8 +44,8 @@ symai/backend/engines/index/engine_pinecone.py,sha256=Horf9lzw2QLMdYvvBLeAu3MOyi
|
|
|
44
44
|
symai/backend/engines/index/engine_vectordb.py,sha256=q0jUvmAh1AEPIE5fpayjSmSg-U9W1OffDXkx4tkFvNI,8048
|
|
45
45
|
symai/backend/engines/lean/engine_lean4.py,sha256=1ZZOzw1kDAJH6hMCyDtK50LAYSsoU8pApWVPI_9Eul0,9440
|
|
46
46
|
symai/backend/engines/neurosymbolic/__init__.py,sha256=mZpsX-UVc86nYjn1hxyJM1AvVq5BvZK8sr5PU-QgwVU,1438
|
|
47
|
-
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py,sha256=
|
|
48
|
-
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py,sha256=
|
|
47
|
+
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py,sha256=CyJei8fJvmb9hsHMh5mqWFISnkqM5b0Bln-bh45NWjE,18805
|
|
48
|
+
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py,sha256=hPCJxZH25HWuYRpNE6XpjC8fMZwPxCObWCfb-UyJYs8,19844
|
|
49
49
|
symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py,sha256=ZaZvAVpgv5GYjE2yZmYhP5zUnEKak5I1mndRfiGfr6U,8995
|
|
50
50
|
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py,sha256=kxGWYsxnQkpsm40HB1MUGggWmrWz8avCY3jvNrohaKw,25622
|
|
51
51
|
symai/backend/engines/neurosymbolic/engine_groq.py,sha256=CsPd2TOHgd-gAIRJvFbTTy_otUvMdDxJPH7DsfZAooM,12176
|
|
@@ -56,7 +56,7 @@ symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py,sha256=YgxR
|
|
|
56
56
|
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py,sha256=QVbyZybUPSAQHiA66V6we2W2dAsk52g1kJ7kMdGqb9I,22951
|
|
57
57
|
symai/backend/engines/ocr/engine_apilayer.py,sha256=hZo4lk0ECRIzaGEpmCSNjR5Xrh8mwkKMD2ddpdgioVU,2399
|
|
58
58
|
symai/backend/engines/output/engine_stdout.py,sha256=2hhyhMHFJTfjVRaODYd_5XPnV9pT03URcpYbeMY_USU,951
|
|
59
|
-
symai/backend/engines/search/engine_openai.py,sha256=
|
|
59
|
+
symai/backend/engines/search/engine_openai.py,sha256=s0ZlcIRDGrzm24M9uhPyztF1w02An2tt3XcoGaGx6RQ,13951
|
|
60
60
|
symai/backend/engines/search/engine_perplexity.py,sha256=yxuhGaA38d1FRbLv6piLll0QDxCCyBVK6eeomjYNryM,4157
|
|
61
61
|
symai/backend/engines/search/engine_serpapi.py,sha256=UqvGHs1J9BOv05C0FJUQjbz29_VuWncIkeDwlRPUilU,3698
|
|
62
62
|
symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=LRsXliCpHDFPFaE-vPky3-DLkmYwmwe2mxfF0Brz4Wg,8220
|
|
@@ -64,9 +64,9 @@ symai/backend/engines/symbolic/engine_wolframalpha.py,sha256=JNKASrNEDxPx17VRUAJ
|
|
|
64
64
|
symai/backend/engines/text_to_speech/engine_openai.py,sha256=rq34pTr4bRU-HeA84AvGEcTOL6Kpu3fZqmLg3Qo3QvU,2150
|
|
65
65
|
symai/backend/engines/text_vision/engine_clip.py,sha256=EUwlom2e7m_efCK2zuPbe1TzyT9CPRlY0mkFTCmXp0U,3740
|
|
66
66
|
symai/backend/engines/userinput/engine_console.py,sha256=FwOakooxCc4oaQv6nYd-uIG2SxJRUI3n64cIs3B82FY,770
|
|
67
|
-
symai/backend/engines/webscraping/engine_requests.py,sha256=
|
|
67
|
+
symai/backend/engines/webscraping/engine_requests.py,sha256=SXUdmWNwMAZpnIseKAPmqUbVdykXNJ3JQ1PssxRnMvE,12421
|
|
68
68
|
symai/backend/mixin/__init__.py,sha256=ischewsMtIFanU30N32ac2Eb8u4hjWxuEb6mrniUv6Y,702
|
|
69
|
-
symai/backend/mixin/anthropic.py,sha256=
|
|
69
|
+
symai/backend/mixin/anthropic.py,sha256=EDDgzQIesVVU0JOTQsuMzLf2IrAXXeQ3NfBH0w3KAJQ,2183
|
|
70
70
|
symai/backend/mixin/deepseek.py,sha256=U-xtUjR9dFTkmiJPAF5_tyuTxpnUxv5gki9WjTfrVL4,379
|
|
71
71
|
symai/backend/mixin/google.py,sha256=aCQDxo_F0_mQGb8h2iYhQmOlo7NuF2IhY85CYro-m4k,453
|
|
72
72
|
symai/backend/mixin/groq.py,sha256=ZXbJcAMR6mHiA5FfnpWivb0l71cgNR-5pYRtWNe8Nmc,232
|
|
@@ -105,7 +105,7 @@ symai/extended/interfaces/gpt_image.py,sha256=dqhgAv2NS7vZrhXK_XBGQ7cDx-MNn5SBwR
|
|
|
105
105
|
symai/extended/interfaces/input.py,sha256=oEPoZeiZLtGZE3rgAapp3SfhCt45JjvRHlfzTg63Axk,447
|
|
106
106
|
symai/extended/interfaces/llava.py,sha256=7o82-Rf1md2yG3kF-SARzPJfq3pDD2ne-28Jc1Zkr90,457
|
|
107
107
|
symai/extended/interfaces/naive_vectordb.py,sha256=eaDGThzZ5qKuM9JiYer6qoFpRMS_lunr9zqxQORGnQc,1403
|
|
108
|
-
symai/extended/interfaces/naive_webscraping.py,sha256=
|
|
108
|
+
symai/extended/interfaces/naive_webscraping.py,sha256=AObpIw1ZYFpJHasjciJLCP1JjshkBkIzez3AN2e-mmo,689
|
|
109
109
|
symai/extended/interfaces/ocr.py,sha256=hmxS0DWEdJ69O0H6CUWKG0O7WwQBa-VFqrwS29dGtc8,561
|
|
110
110
|
symai/extended/interfaces/openai_search.py,sha256=znJiSyS7KXFxRmbPxArM47a5jyg7byAA4cgVwfvegtw,526
|
|
111
111
|
symai/extended/interfaces/perplexity.py,sha256=eKovuqV5bbWZA0Y8kPzHOcnYXjUGveO3CB6bD7FSPuc,527
|
|
@@ -141,7 +141,7 @@ symai/formatter/emoji.pytxt,sha256=D7FMnAar7UMBZnVoPIa_drnv8thIM8r0KNq_BkQ0k8k,1
|
|
|
141
141
|
symai/formatter/formatter.py,sha256=Od109Rx5E7b_O6mod-6HXq4mkNwg-E5wQr52zBya68w,8386
|
|
142
142
|
symai/formatter/regex.py,sha256=POf4anhw2FovCQinq3yFWGNcWXf3diIV8yVrp9adieA,9924
|
|
143
143
|
symai/menu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
|
-
symai/menu/screen.py,sha256=
|
|
144
|
+
symai/menu/screen.py,sha256=x0d2wH1KyjyOJFmv0pdGpolVGol1ItwMJvANjm8GhFk,1846
|
|
145
145
|
symai/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
146
|
symai/misc/console.py,sha256=hwAET_vwX7bwxmrQ6lgUbZhv3ME5_4vMtOWeCAHg5no,3052
|
|
147
147
|
symai/misc/loader.py,sha256=7lyIMIvU6Ywo_xt-TM8Xqhc6W4tY67U5XzxSkuAYZi8,1635
|
|
@@ -154,8 +154,9 @@ symai/ops/primitives.py,sha256=EaB2Ekx9yGNDaQa3aKS5KpuEr5awAUbO3OcBbufI-l4,11072
|
|
|
154
154
|
symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
155
155
|
symai/server/huggingface_server.py,sha256=UpSBflnQaenDjY1AAn5LUYeg5J4gJLWiMuC5DcoIV3E,8743
|
|
156
156
|
symai/server/llama_cpp_server.py,sha256=qVCldTdcQhK2YCu7sDNSYziu1p2AQieqMFfY028-yOc,2049
|
|
157
|
-
symbolicai-0.
|
|
158
|
-
symbolicai-0.
|
|
159
|
-
symbolicai-0.
|
|
160
|
-
symbolicai-0.
|
|
161
|
-
symbolicai-0.
|
|
157
|
+
symbolicai-0.20.1.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
|
|
158
|
+
symbolicai-0.20.1.dist-info/METADATA,sha256=Lg6BX-_4lfl9afWX09E9uuvyenffX8Y4Qwkb5zh_ZKc,23122
|
|
159
|
+
symbolicai-0.20.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
160
|
+
symbolicai-0.20.1.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
|
|
161
|
+
symbolicai-0.20.1.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
|
|
162
|
+
symbolicai-0.20.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, ExtensityAI FlexCo
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|