symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. symai/__init__.py +269 -173
  2. symai/backend/base.py +123 -110
  3. symai/backend/engines/drawing/engine_bfl.py +45 -44
  4. symai/backend/engines/drawing/engine_gpt_image.py +112 -97
  5. symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
  6. symai/backend/engines/embedding/engine_openai.py +25 -21
  7. symai/backend/engines/execute/engine_python.py +19 -18
  8. symai/backend/engines/files/engine_io.py +104 -95
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
  11. symai/backend/engines/index/engine_pinecone.py +124 -97
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +84 -56
  14. symai/backend/engines/lean/engine_lean4.py +96 -52
  15. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
  21. symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
  26. symai/backend/engines/ocr/engine_apilayer.py +23 -27
  27. symai/backend/engines/output/engine_stdout.py +10 -13
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
  29. symai/backend/engines/search/engine_openai.py +100 -88
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +44 -45
  32. symai/backend/engines/search/engine_serpapi.py +37 -34
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
  35. symai/backend/engines/text_to_speech/engine_openai.py +20 -26
  36. symai/backend/engines/text_vision/engine_clip.py +39 -37
  37. symai/backend/engines/userinput/engine_console.py +5 -6
  38. symai/backend/mixin/__init__.py +13 -0
  39. symai/backend/mixin/anthropic.py +48 -38
  40. symai/backend/mixin/deepseek.py +6 -5
  41. symai/backend/mixin/google.py +7 -4
  42. symai/backend/mixin/groq.py +2 -4
  43. symai/backend/mixin/openai.py +140 -110
  44. symai/backend/settings.py +87 -20
  45. symai/chat.py +216 -123
  46. symai/collect/__init__.py +7 -1
  47. symai/collect/dynamic.py +80 -70
  48. symai/collect/pipeline.py +67 -51
  49. symai/collect/stats.py +161 -109
  50. symai/components.py +707 -360
  51. symai/constraints.py +24 -12
  52. symai/core.py +1857 -1233
  53. symai/core_ext.py +83 -80
  54. symai/endpoints/api.py +166 -104
  55. symai/extended/.DS_Store +0 -0
  56. symai/extended/__init__.py +46 -12
  57. symai/extended/api_builder.py +29 -21
  58. symai/extended/arxiv_pdf_parser.py +23 -14
  59. symai/extended/bibtex_parser.py +9 -6
  60. symai/extended/conversation.py +156 -126
  61. symai/extended/document.py +50 -30
  62. symai/extended/file_merger.py +57 -14
  63. symai/extended/graph.py +51 -32
  64. symai/extended/html_style_template.py +18 -14
  65. symai/extended/interfaces/blip_2.py +2 -3
  66. symai/extended/interfaces/clip.py +4 -3
  67. symai/extended/interfaces/console.py +9 -1
  68. symai/extended/interfaces/dall_e.py +4 -2
  69. symai/extended/interfaces/file.py +2 -0
  70. symai/extended/interfaces/flux.py +4 -2
  71. symai/extended/interfaces/gpt_image.py +16 -7
  72. symai/extended/interfaces/input.py +2 -1
  73. symai/extended/interfaces/llava.py +1 -2
  74. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
  75. symai/extended/interfaces/naive_vectordb.py +9 -10
  76. symai/extended/interfaces/ocr.py +5 -3
  77. symai/extended/interfaces/openai_search.py +2 -0
  78. symai/extended/interfaces/parallel.py +30 -0
  79. symai/extended/interfaces/perplexity.py +2 -0
  80. symai/extended/interfaces/pinecone.py +12 -9
  81. symai/extended/interfaces/python.py +2 -0
  82. symai/extended/interfaces/serpapi.py +3 -1
  83. symai/extended/interfaces/terminal.py +2 -4
  84. symai/extended/interfaces/tts.py +3 -2
  85. symai/extended/interfaces/whisper.py +3 -2
  86. symai/extended/interfaces/wolframalpha.py +2 -1
  87. symai/extended/metrics/__init__.py +11 -1
  88. symai/extended/metrics/similarity.py +14 -13
  89. symai/extended/os_command.py +39 -29
  90. symai/extended/packages/__init__.py +29 -3
  91. symai/extended/packages/symdev.py +51 -43
  92. symai/extended/packages/sympkg.py +41 -35
  93. symai/extended/packages/symrun.py +63 -50
  94. symai/extended/repo_cloner.py +14 -12
  95. symai/extended/seo_query_optimizer.py +15 -13
  96. symai/extended/solver.py +116 -91
  97. symai/extended/summarizer.py +12 -10
  98. symai/extended/taypan_interpreter.py +17 -18
  99. symai/extended/vectordb.py +122 -92
  100. symai/formatter/__init__.py +9 -1
  101. symai/formatter/formatter.py +51 -47
  102. symai/formatter/regex.py +70 -69
  103. symai/functional.py +325 -176
  104. symai/imports.py +190 -147
  105. symai/interfaces.py +57 -28
  106. symai/memory.py +45 -35
  107. symai/menu/screen.py +28 -19
  108. symai/misc/console.py +66 -56
  109. symai/misc/loader.py +8 -5
  110. symai/models/__init__.py +17 -1
  111. symai/models/base.py +395 -236
  112. symai/models/errors.py +1 -2
  113. symai/ops/__init__.py +32 -22
  114. symai/ops/measures.py +24 -25
  115. symai/ops/primitives.py +1149 -731
  116. symai/post_processors.py +58 -50
  117. symai/pre_processors.py +86 -82
  118. symai/processor.py +21 -13
  119. symai/prompts.py +764 -685
  120. symai/server/huggingface_server.py +135 -49
  121. symai/server/llama_cpp_server.py +21 -11
  122. symai/server/qdrant_server.py +206 -0
  123. symai/shell.py +100 -42
  124. symai/shellsv.py +700 -492
  125. symai/strategy.py +630 -346
  126. symai/symbol.py +368 -322
  127. symai/utils.py +100 -78
  128. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
  129. symbolicai-1.1.0.dist-info/RECORD +168 -0
  130. symbolicai-0.21.0.dist-info/RECORD +0 -162
  131. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  132. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  133. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  134. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,10 @@
1
- import requests
2
1
  from pathlib import Path
3
2
 
4
- from typing import Optional
3
+ import requests
5
4
 
5
+ from ....symbol import Result
6
6
  from ...base import Engine
7
7
  from ...settings import SYMAI_CONFIG
8
- from ....symbol import Result
9
8
 
10
9
 
11
10
  class ApiLayerResult(Result):
@@ -14,56 +13,53 @@ class ApiLayerResult(Result):
14
13
  self.raw = text
15
14
  try:
16
15
  dict_ = self._to_symbol(text).ast()
17
- self._value = dict_['all_text'] if 'all_text' in dict_ else f'OCR Engine Error: {text} - status code {status_code}'
18
- except:
19
- self._value = f'OCR Engine Error: {text} - status code {status_code}'
16
+ self._value = dict_.get(
17
+ "all_text", f"OCR Engine Error: {text} - status code {status_code}"
18
+ )
19
+ except Exception:
20
+ self._value = f"OCR Engine Error: {text} - status code {status_code}"
20
21
 
21
22
 
22
23
  class OCREngine(Engine):
23
- def __init__(self, api_key: Optional[str] = None):
24
+ def __init__(self, api_key: str | None = None):
24
25
  super().__init__()
25
26
  # Opening JSON file
26
27
  self.config = SYMAI_CONFIG
27
- self.headers = {
28
- "apikey": self.config['OCR_ENGINE_API_KEY'] if api_key is None else api_key
29
- }
28
+ self.headers = {"apikey": self.config["OCR_ENGINE_API_KEY"] if api_key is None else api_key}
30
29
  self.name = self.__class__.__name__
31
30
 
32
31
  def id(self) -> str:
33
- if self.config['OCR_ENGINE_API_KEY']:
34
- return 'ocr'
35
- return super().id() # default to unregistered
32
+ if self.config["OCR_ENGINE_API_KEY"]:
33
+ return "ocr"
34
+ return super().id() # default to unregistered
36
35
 
37
36
  def command(self, *args, **kwargs):
38
37
  super().command(*args, **kwargs)
39
- if 'OCR_ENGINE_API_KEY' in kwargs:
40
- self.headers = {
41
- "apikey": kwargs['OCR_ENGINE_API_KEY']
42
- }
38
+ if "OCR_ENGINE_API_KEY" in kwargs:
39
+ self.headers = {"apikey": kwargs["OCR_ENGINE_API_KEY"]}
43
40
 
44
41
  def forward(self, argument):
45
- kwargs = argument.kwargs
46
42
  image_url = argument.prop.image
47
43
 
48
44
  if image_url.startswith("file://"):
49
45
  file_path = Path(image_url[7:]).resolve()
50
- with open(file_path, "rb") as file:
46
+ with file_path.open("rb") as file:
51
47
  payload = file.read()
52
- url = "https://api.apilayer.com/image_to_text/upload"
48
+ url = "https://api.apilayer.com/image_to_text/upload"
53
49
  response = requests.request("POST", url, headers=self.headers, data=payload)
54
50
  else:
55
- payload = {}
56
- url = f"https://api.apilayer.com/image_to_text/url?url={image_url}"
57
- response = requests.request("GET", url, headers=self.headers, data = payload)
51
+ payload = {}
52
+ url = f"https://api.apilayer.com/image_to_text/url?url={image_url}"
53
+ response = requests.request("GET", url, headers=self.headers, data=payload)
58
54
 
59
55
  status_code = response.status_code
60
- rsp = response.text
61
- rsp = ApiLayerResult(response.text, status_code)
62
- metadata = {}
56
+ rsp = response.text
57
+ rsp = ApiLayerResult(response.text, status_code)
58
+ metadata = {}
63
59
 
64
60
  return [rsp], metadata
65
61
 
66
62
  def prepare(self, argument):
67
63
  assert not argument.prop.processed_input, "OCREngine does not support processed_input."
68
- image = str(argument.prop.image)
64
+ image = str(argument.prop.image)
69
65
  argument.prop.prepared_input = image
@@ -7,28 +7,25 @@ class OutputEngine(Engine):
7
7
  self.name = self.__class__.__name__
8
8
 
9
9
  def id(self) -> str:
10
- return 'output'
10
+ return "output"
11
11
 
12
12
  def forward(self, argument):
13
- expr, processed, args, kwargs = argument.prop.prepared_input
13
+ expr, processed, args, kwargs = argument.prop.prepared_input
14
14
  res = None
15
15
  args = [] if args is None else args
16
16
  kwargs = {} if kwargs is None else kwargs
17
17
  if expr:
18
- if processed:
19
- res = expr(processed, *args, **kwargs)
20
- else:
21
- res = expr(*args, **kwargs)
18
+ res = expr(processed, *args, **kwargs) if processed else expr(*args, **kwargs)
22
19
 
23
20
  metadata = {}
24
- result = {
25
- 'result': res,
26
- 'processed': processed,
27
- 'args': args,
28
- 'kwargs': kwargs
29
- }
21
+ result = {"result": res, "processed": processed, "args": args, "kwargs": kwargs}
30
22
 
31
23
  return [result], metadata
32
24
 
33
25
  def prepare(self, argument):
34
- argument.prop.prepared_input = argument.prop.expr, argument.prop.processed_input, argument.prop.args, argument.prop.kwargs
26
+ argument.prop.prepared_input = (
27
+ argument.prop.expr,
28
+ argument.prop.processed_input,
29
+ argument.prop.args,
30
+ argument.prop.kwargs,
31
+ )
@@ -10,6 +10,7 @@ service disruption.
10
10
  import io
11
11
  import logging
12
12
  import re
13
+ from typing import Any, ClassVar
13
14
  from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
14
15
 
15
16
  import requests
@@ -19,6 +20,7 @@ from pdfminer.high_level import extract_text
19
20
  from requests.structures import CaseInsensitiveDict
20
21
 
21
22
  from ....symbol import Result
23
+ from ....utils import UserMessage
22
24
  from ...base import Engine
23
25
 
24
26
  logging.getLogger("pdfminer").setLevel(logging.WARNING)
@@ -56,23 +58,23 @@ class RequestsEngine(Engine):
56
58
  the requests session stay aligned.
57
59
  """
58
60
 
59
- COMMON_BYPASS_COOKIES = {
61
+ COMMON_BYPASS_COOKIES: ClassVar[dict[str, str]] = {
60
62
  # Some forums display consent or age gates once if a friendly cookie is set.
61
63
  "cookieconsent_status": "allow",
62
64
  "accepted_cookies": "yes",
63
65
  "age_verified": "1",
64
66
  }
65
67
 
66
- DEFAULT_HEADERS = {
68
+ DEFAULT_HEADERS: ClassVar[dict[str, str]] = {
67
69
  "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
68
- "AppleWebKit/537.36 (KHTML, like Gecko) "
69
- "Chrome/120.0.0.0 Safari/537.36",
70
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
71
+ "Chrome/120.0.0.0 Safari/537.36",
70
72
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
71
73
  "Accept-Language": "en-US,en;q=0.9",
72
74
  "DNT": "1",
73
75
  }
74
76
 
75
- _SAMESITE_CANONICAL = {
77
+ _SAMESITE_CANONICAL: ClassVar[dict[str, str]] = {
76
78
  "strict": "Strict",
77
79
  "lax": "Lax",
78
80
  "none": "None",
@@ -156,6 +158,58 @@ class RequestsEngine(Engine):
156
158
  payload["sameSite"] = same_site
157
159
  return payload
158
160
 
161
+ def _collect_playwright_cookies(self, hostname: str) -> list[dict[str, Any]]:
162
+ if not hostname:
163
+ return []
164
+ cookie_payload = []
165
+ for cookie in self.session.cookies:
166
+ payload = self._playwright_cookie_payload(cookie, hostname)
167
+ if payload:
168
+ cookie_payload.append(payload)
169
+ return cookie_payload
170
+
171
+ @staticmethod
172
+ def _add_cookies_to_context(context, cookie_payload: list[dict[str, Any]]) -> None:
173
+ if cookie_payload:
174
+ context.add_cookies(cookie_payload)
175
+
176
+ @staticmethod
177
+ def _navigate_playwright_page(
178
+ page, url: str, wait_selector: str | None, wait_until: str, timeout_ms: int, timeout_error
179
+ ):
180
+ try:
181
+ response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
182
+ if wait_selector:
183
+ page.wait_for_selector(wait_selector, timeout=timeout_ms)
184
+ return response, None
185
+ except timeout_error as exc:
186
+ return None, exc
187
+
188
+ @staticmethod
189
+ def _safe_page_content(page) -> str:
190
+ try:
191
+ return page.content()
192
+ except Exception:
193
+ return ""
194
+
195
+ def _sync_cookies_from_context(self, context) -> None:
196
+ for cookie in context.cookies():
197
+ self.session.cookies.set(
198
+ cookie["name"],
199
+ cookie["value"],
200
+ domain=cookie.get("domain"),
201
+ path=cookie.get("path", "/"),
202
+ )
203
+
204
+ @staticmethod
205
+ def _rendered_response_metadata(page, response):
206
+ final_url = page.url
207
+ status = response.status if response is not None else 200
208
+ headers = CaseInsensitiveDict(response.headers if response is not None else {})
209
+ if "content-type" not in headers:
210
+ headers["Content-Type"] = "text/html; charset=utf-8"
211
+ return final_url, status, headers
212
+
159
213
  def _follow_meta_refresh(self, resp, timeout=15):
160
214
  """
161
215
  Some old forums use <meta http-equiv="refresh" content="0;url=...">
@@ -180,19 +234,28 @@ class RequestsEngine(Engine):
180
234
  return resp
181
235
  return self.session.get(target, timeout=timeout, allow_redirects=True)
182
236
 
183
- def _fetch_with_playwright(self, url: str, wait_selector: str = None, wait_until: str = "networkidle", timeout: float = None):
237
+ def _fetch_with_playwright(
238
+ self,
239
+ url: str,
240
+ wait_selector: str | None = None,
241
+ wait_until: str = "networkidle",
242
+ timeout: float | None = None,
243
+ ):
184
244
  """
185
245
  Render the target URL in a headless browser to execute JavaScript and
186
246
  return a synthetic ``requests.Response`` object to keep downstream
187
247
  processing consistent with the non-JS path.
188
248
  """
189
249
  try:
190
- from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
250
+ # Playwright is optional; import only when JS rendering is requested.
251
+ from playwright.sync_api import TimeoutError as PlaywrightTimeoutError # noqa
252
+ from playwright.sync_api import sync_playwright # noqa
253
+
191
254
  logging.getLogger("playwright").setLevel(logging.WARNING)
192
255
  except ImportError as exc:
193
- raise RuntimeError(
194
- "Playwright is not installed. Install symbolicai[webscraping] with Playwright extras to enable render_js."
195
- ) from exc
256
+ msg = "Playwright is not installed. Install symbolicai[scrape] with Playwright extras to enable render_js."
257
+ UserMessage(msg)
258
+ raise RuntimeError(msg) from exc
196
259
 
197
260
  timeout_seconds = timeout if timeout is not None else self.timeout
198
261
  timeout_ms = max(int(timeout_seconds * 1000), 0)
@@ -200,12 +263,7 @@ class RequestsEngine(Engine):
200
263
 
201
264
  parsed = urlparse(url)
202
265
  hostname = parsed.hostname or ""
203
- cookie_payload = []
204
- if hostname:
205
- for cookie in self.session.cookies:
206
- payload = self._playwright_cookie_payload(cookie, hostname)
207
- if payload:
208
- cookie_payload.append(payload)
266
+ cookie_payload = self._collect_playwright_cookies(hostname)
209
267
 
210
268
  content = ""
211
269
  final_url = url
@@ -219,42 +277,26 @@ class RequestsEngine(Engine):
219
277
  java_script_enabled=True,
220
278
  ignore_https_errors=not self.verify_ssl,
221
279
  )
222
- if cookie_payload:
223
- context.add_cookies(cookie_payload)
224
- page = context.new_page()
225
-
226
- navigation_error = None
227
- response = None
228
280
  try:
229
- try:
230
- response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
231
- if wait_selector:
232
- page.wait_for_selector(wait_selector, timeout=timeout_ms)
233
- except PlaywrightTimeoutError as exc:
234
- navigation_error = exc
235
-
236
- try:
237
- content = page.content()
238
- except Exception:
239
- content = ""
240
-
241
- # Always persist Playwright cookies back into the requests session.
242
- for cookie in context.cookies():
243
- self.session.cookies.set(
244
- cookie["name"],
245
- cookie["value"],
246
- domain=cookie.get("domain"),
247
- path=cookie.get("path", "/"),
248
- )
249
-
250
- final_url = page.url
251
- status = response.status if response is not None else 200
252
- headers = CaseInsensitiveDict(response.headers if response is not None else {})
253
- if "content-type" not in headers:
254
- headers["Content-Type"] = "text/html; charset=utf-8"
255
-
281
+ self._add_cookies_to_context(context, cookie_payload)
282
+ page = context.new_page()
283
+
284
+ response, navigation_error = self._navigate_playwright_page(
285
+ page,
286
+ url,
287
+ wait_selector,
288
+ wait_until,
289
+ timeout_ms,
290
+ PlaywrightTimeoutError,
291
+ )
292
+ content = self._safe_page_content(page)
293
+ self._sync_cookies_from_context(context)
294
+
295
+ final_url, status, headers = self._rendered_response_metadata(page, response)
256
296
  if navigation_error and not content:
257
- raise requests.exceptions.Timeout(f"Playwright timed out while rendering {url}") from navigation_error
297
+ msg = f"Playwright timed out while rendering {url}"
298
+ UserMessage(msg)
299
+ raise requests.exceptions.Timeout(msg) from navigation_error
258
300
  finally:
259
301
  context.close()
260
302
  browser.close()
@@ -268,7 +310,7 @@ class RequestsEngine(Engine):
268
310
  return rendered_response
269
311
 
270
312
  def id(self) -> str:
271
- return 'webscraping'
313
+ return "scrape"
272
314
 
273
315
  def forward(self, argument):
274
316
  """
@@ -284,8 +326,11 @@ class RequestsEngine(Engine):
284
326
  self._maybe_set_bypass_cookies(url)
285
327
 
286
328
  parsed = urlparse(url)
287
- qs = [(k, v) for k, v in parse_qsl(parsed.query, keep_blank_values=True)
288
- if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}]
329
+ qs = [
330
+ (k, v)
331
+ for k, v in parse_qsl(parsed.query, keep_blank_values=True)
332
+ if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}
333
+ ]
289
334
  clean_url = urlunparse(parsed._replace(query=urlencode(qs)))
290
335
 
291
336
  render_js = kwargs.get("render_js")
@@ -302,7 +347,9 @@ class RequestsEngine(Engine):
302
347
  timeout=render_timeout,
303
348
  )
304
349
  else:
305
- resp = self.session.get(clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl)
350
+ resp = self.session.get(
351
+ clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
352
+ )
306
353
  resp.raise_for_status()
307
354
 
308
355
  # Follow a legacy meta refresh once (do AFTER normal HTTP redirects)