symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. symai/__init__.py +198 -134
  2. symai/backend/base.py +51 -51
  3. symai/backend/engines/drawing/engine_bfl.py +33 -33
  4. symai/backend/engines/drawing/engine_gpt_image.py +4 -10
  5. symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
  6. symai/backend/engines/embedding/engine_openai.py +22 -16
  7. symai/backend/engines/execute/engine_python.py +16 -16
  8. symai/backend/engines/files/engine_io.py +51 -49
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
  11. symai/backend/engines/index/engine_pinecone.py +116 -88
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +78 -52
  14. symai/backend/engines/lean/engine_lean4.py +65 -25
  15. symai/backend/engines/neurosymbolic/__init__.py +28 -28
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
  21. symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
  26. symai/backend/engines/ocr/engine_apilayer.py +18 -20
  27. symai/backend/engines/output/engine_stdout.py +9 -9
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
  29. symai/backend/engines/search/engine_openai.py +95 -83
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +40 -41
  32. symai/backend/engines/search/engine_serpapi.py +33 -28
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
  35. symai/backend/engines/text_to_speech/engine_openai.py +15 -19
  36. symai/backend/engines/text_vision/engine_clip.py +34 -28
  37. symai/backend/engines/userinput/engine_console.py +3 -4
  38. symai/backend/mixin/anthropic.py +48 -40
  39. symai/backend/mixin/deepseek.py +4 -5
  40. symai/backend/mixin/google.py +5 -4
  41. symai/backend/mixin/groq.py +2 -4
  42. symai/backend/mixin/openai.py +132 -110
  43. symai/backend/settings.py +14 -14
  44. symai/chat.py +164 -94
  45. symai/collect/dynamic.py +13 -11
  46. symai/collect/pipeline.py +39 -31
  47. symai/collect/stats.py +109 -69
  48. symai/components.py +556 -238
  49. symai/constraints.py +14 -5
  50. symai/core.py +1495 -1210
  51. symai/core_ext.py +55 -50
  52. symai/endpoints/api.py +113 -58
  53. symai/extended/api_builder.py +22 -17
  54. symai/extended/arxiv_pdf_parser.py +13 -5
  55. symai/extended/bibtex_parser.py +8 -4
  56. symai/extended/conversation.py +88 -69
  57. symai/extended/document.py +40 -27
  58. symai/extended/file_merger.py +45 -7
  59. symai/extended/graph.py +38 -24
  60. symai/extended/html_style_template.py +17 -11
  61. symai/extended/interfaces/blip_2.py +1 -1
  62. symai/extended/interfaces/clip.py +4 -2
  63. symai/extended/interfaces/console.py +5 -3
  64. symai/extended/interfaces/dall_e.py +3 -1
  65. symai/extended/interfaces/file.py +2 -0
  66. symai/extended/interfaces/flux.py +3 -1
  67. symai/extended/interfaces/gpt_image.py +15 -6
  68. symai/extended/interfaces/input.py +2 -1
  69. symai/extended/interfaces/llava.py +1 -1
  70. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
  71. symai/extended/interfaces/naive_vectordb.py +2 -2
  72. symai/extended/interfaces/ocr.py +4 -2
  73. symai/extended/interfaces/openai_search.py +2 -0
  74. symai/extended/interfaces/parallel.py +30 -0
  75. symai/extended/interfaces/perplexity.py +2 -0
  76. symai/extended/interfaces/pinecone.py +6 -4
  77. symai/extended/interfaces/python.py +2 -0
  78. symai/extended/interfaces/serpapi.py +2 -0
  79. symai/extended/interfaces/terminal.py +0 -1
  80. symai/extended/interfaces/tts.py +2 -1
  81. symai/extended/interfaces/whisper.py +2 -1
  82. symai/extended/interfaces/wolframalpha.py +1 -0
  83. symai/extended/metrics/__init__.py +1 -1
  84. symai/extended/metrics/similarity.py +5 -2
  85. symai/extended/os_command.py +31 -22
  86. symai/extended/packages/symdev.py +39 -34
  87. symai/extended/packages/sympkg.py +30 -27
  88. symai/extended/packages/symrun.py +46 -35
  89. symai/extended/repo_cloner.py +10 -9
  90. symai/extended/seo_query_optimizer.py +15 -12
  91. symai/extended/solver.py +104 -76
  92. symai/extended/summarizer.py +8 -7
  93. symai/extended/taypan_interpreter.py +10 -9
  94. symai/extended/vectordb.py +28 -15
  95. symai/formatter/formatter.py +39 -31
  96. symai/formatter/regex.py +46 -44
  97. symai/functional.py +184 -86
  98. symai/imports.py +85 -51
  99. symai/interfaces.py +1 -1
  100. symai/memory.py +33 -24
  101. symai/menu/screen.py +28 -19
  102. symai/misc/console.py +27 -27
  103. symai/misc/loader.py +4 -3
  104. symai/models/base.py +147 -76
  105. symai/models/errors.py +1 -1
  106. symai/ops/__init__.py +1 -1
  107. symai/ops/measures.py +17 -14
  108. symai/ops/primitives.py +933 -635
  109. symai/post_processors.py +28 -24
  110. symai/pre_processors.py +58 -52
  111. symai/processor.py +15 -9
  112. symai/prompts.py +714 -649
  113. symai/server/huggingface_server.py +115 -32
  114. symai/server/llama_cpp_server.py +14 -6
  115. symai/server/qdrant_server.py +206 -0
  116. symai/shell.py +98 -39
  117. symai/shellsv.py +307 -223
  118. symai/strategy.py +135 -81
  119. symai/symbol.py +276 -225
  120. symai/utils.py +62 -46
  121. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
  122. symbolicai-1.1.0.dist-info/RECORD +168 -0
  123. symbolicai-1.0.0.dist-info/RECORD +0 -163
  124. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  125. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  126. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  127. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,11 @@ class ApiLayerResult(Result):
13
13
  self.raw = text
14
14
  try:
15
15
  dict_ = self._to_symbol(text).ast()
16
- self._value = dict_.get('all_text', f'OCR Engine Error: {text} - status code {status_code}')
16
+ self._value = dict_.get(
17
+ "all_text", f"OCR Engine Error: {text} - status code {status_code}"
18
+ )
17
19
  except Exception:
18
- self._value = f'OCR Engine Error: {text} - status code {status_code}'
20
+ self._value = f"OCR Engine Error: {text} - status code {status_code}"
19
21
 
20
22
 
21
23
  class OCREngine(Engine):
@@ -23,22 +25,18 @@ class OCREngine(Engine):
23
25
  super().__init__()
24
26
  # Opening JSON file
25
27
  self.config = SYMAI_CONFIG
26
- self.headers = {
27
- "apikey": self.config['OCR_ENGINE_API_KEY'] if api_key is None else api_key
28
- }
28
+ self.headers = {"apikey": self.config["OCR_ENGINE_API_KEY"] if api_key is None else api_key}
29
29
  self.name = self.__class__.__name__
30
30
 
31
31
  def id(self) -> str:
32
- if self.config['OCR_ENGINE_API_KEY']:
33
- return 'ocr'
34
- return super().id() # default to unregistered
32
+ if self.config["OCR_ENGINE_API_KEY"]:
33
+ return "ocr"
34
+ return super().id() # default to unregistered
35
35
 
36
36
  def command(self, *args, **kwargs):
37
37
  super().command(*args, **kwargs)
38
- if 'OCR_ENGINE_API_KEY' in kwargs:
39
- self.headers = {
40
- "apikey": kwargs['OCR_ENGINE_API_KEY']
41
- }
38
+ if "OCR_ENGINE_API_KEY" in kwargs:
39
+ self.headers = {"apikey": kwargs["OCR_ENGINE_API_KEY"]}
42
40
 
43
41
  def forward(self, argument):
44
42
  image_url = argument.prop.image
@@ -47,21 +45,21 @@ class OCREngine(Engine):
47
45
  file_path = Path(image_url[7:]).resolve()
48
46
  with file_path.open("rb") as file:
49
47
  payload = file.read()
50
- url = "https://api.apilayer.com/image_to_text/upload"
48
+ url = "https://api.apilayer.com/image_to_text/upload"
51
49
  response = requests.request("POST", url, headers=self.headers, data=payload)
52
50
  else:
53
- payload = {}
54
- url = f"https://api.apilayer.com/image_to_text/url?url={image_url}"
55
- response = requests.request("GET", url, headers=self.headers, data = payload)
51
+ payload = {}
52
+ url = f"https://api.apilayer.com/image_to_text/url?url={image_url}"
53
+ response = requests.request("GET", url, headers=self.headers, data=payload)
56
54
 
57
55
  status_code = response.status_code
58
- rsp = response.text
59
- rsp = ApiLayerResult(response.text, status_code)
60
- metadata = {}
56
+ rsp = response.text
57
+ rsp = ApiLayerResult(response.text, status_code)
58
+ metadata = {}
61
59
 
62
60
  return [rsp], metadata
63
61
 
64
62
  def prepare(self, argument):
65
63
  assert not argument.prop.processed_input, "OCREngine does not support processed_input."
66
- image = str(argument.prop.image)
64
+ image = str(argument.prop.image)
67
65
  argument.prop.prepared_input = image
@@ -7,10 +7,10 @@ class OutputEngine(Engine):
7
7
  self.name = self.__class__.__name__
8
8
 
9
9
  def id(self) -> str:
10
- return 'output'
10
+ return "output"
11
11
 
12
12
  def forward(self, argument):
13
- expr, processed, args, kwargs = argument.prop.prepared_input
13
+ expr, processed, args, kwargs = argument.prop.prepared_input
14
14
  res = None
15
15
  args = [] if args is None else args
16
16
  kwargs = {} if kwargs is None else kwargs
@@ -18,14 +18,14 @@ class OutputEngine(Engine):
18
18
  res = expr(processed, *args, **kwargs) if processed else expr(*args, **kwargs)
19
19
 
20
20
  metadata = {}
21
- result = {
22
- 'result': res,
23
- 'processed': processed,
24
- 'args': args,
25
- 'kwargs': kwargs
26
- }
21
+ result = {"result": res, "processed": processed, "args": args, "kwargs": kwargs}
27
22
 
28
23
  return [result], metadata
29
24
 
30
25
  def prepare(self, argument):
31
- argument.prop.prepared_input = argument.prop.expr, argument.prop.processed_input, argument.prop.args, argument.prop.kwargs
26
+ argument.prop.prepared_input = (
27
+ argument.prop.expr,
28
+ argument.prop.processed_input,
29
+ argument.prop.args,
30
+ argument.prop.kwargs,
31
+ )
@@ -67,8 +67,8 @@ class RequestsEngine(Engine):
67
67
 
68
68
  DEFAULT_HEADERS: ClassVar[dict[str, str]] = {
69
69
  "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
70
- "AppleWebKit/537.36 (KHTML, like Gecko) "
71
- "Chrome/120.0.0.0 Safari/537.36",
70
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
71
+ "Chrome/120.0.0.0 Safari/537.36",
72
72
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
73
73
  "Accept-Language": "en-US,en;q=0.9",
74
74
  "DNT": "1",
@@ -174,7 +174,9 @@ class RequestsEngine(Engine):
174
174
  context.add_cookies(cookie_payload)
175
175
 
176
176
  @staticmethod
177
- def _navigate_playwright_page(page, url: str, wait_selector: str | None, wait_until: str, timeout_ms: int, timeout_error):
177
+ def _navigate_playwright_page(
178
+ page, url: str, wait_selector: str | None, wait_until: str, timeout_ms: int, timeout_error
179
+ ):
178
180
  try:
179
181
  response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
180
182
  if wait_selector:
@@ -232,7 +234,13 @@ class RequestsEngine(Engine):
232
234
  return resp
233
235
  return self.session.get(target, timeout=timeout, allow_redirects=True)
234
236
 
235
- def _fetch_with_playwright(self, url: str, wait_selector: str | None = None, wait_until: str = "networkidle", timeout: float | None = None):
237
+ def _fetch_with_playwright(
238
+ self,
239
+ url: str,
240
+ wait_selector: str | None = None,
241
+ wait_until: str = "networkidle",
242
+ timeout: float | None = None,
243
+ ):
236
244
  """
237
245
  Render the target URL in a headless browser to execute JavaScript and
238
246
  return a synthetic ``requests.Response`` object to keep downstream
@@ -240,11 +248,12 @@ class RequestsEngine(Engine):
240
248
  """
241
249
  try:
242
250
  # Playwright is optional; import only when JS rendering is requested.
243
- from playwright.sync_api import TimeoutError as PlaywrightTimeoutError # noqa
244
- from playwright.sync_api import sync_playwright # noqa
251
+ from playwright.sync_api import TimeoutError as PlaywrightTimeoutError # noqa
252
+ from playwright.sync_api import sync_playwright # noqa
253
+
245
254
  logging.getLogger("playwright").setLevel(logging.WARNING)
246
255
  except ImportError as exc:
247
- msg = "Playwright is not installed. Install symbolicai[webscraping] with Playwright extras to enable render_js."
256
+ msg = "Playwright is not installed. Install symbolicai[scrape] with Playwright extras to enable render_js."
248
257
  UserMessage(msg)
249
258
  raise RuntimeError(msg) from exc
250
259
 
@@ -301,7 +310,7 @@ class RequestsEngine(Engine):
301
310
  return rendered_response
302
311
 
303
312
  def id(self) -> str:
304
- return 'webscraping'
313
+ return "scrape"
305
314
 
306
315
  def forward(self, argument):
307
316
  """
@@ -317,8 +326,11 @@ class RequestsEngine(Engine):
317
326
  self._maybe_set_bypass_cookies(url)
318
327
 
319
328
  parsed = urlparse(url)
320
- qs = [(k, v) for k, v in parse_qsl(parsed.query, keep_blank_values=True)
321
- if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}]
329
+ qs = [
330
+ (k, v)
331
+ for k, v in parse_qsl(parsed.query, keep_blank_values=True)
332
+ if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}
333
+ ]
322
334
  clean_url = urlunparse(parsed._replace(query=urlencode(qs)))
323
335
 
324
336
  render_js = kwargs.get("render_js")
@@ -335,7 +347,9 @@ class RequestsEngine(Engine):
335
347
  timeout=render_timeout,
336
348
  )
337
349
  else:
338
- resp = self.session.get(clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl)
350
+ resp = self.session.get(
351
+ clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
352
+ )
339
353
  resp.raise_for_status()
340
354
 
341
355
  # Follow a legacy meta refresh once (do AFTER normal HTTP redirects)
@@ -21,9 +21,10 @@ logging.getLogger("httpcore").setLevel(logging.ERROR)
21
21
 
22
22
 
23
23
  TRACKING_KEYS = {
24
- "utm_source" # so far I've only seen this one
24
+ "utm_source" # so far I've only seen this one
25
25
  }
26
26
 
27
+
27
28
  @dataclass
28
29
  class Citation:
29
30
  id: int
@@ -33,14 +34,14 @@ class Citation:
33
34
  end: int
34
35
 
35
36
  def __hash__(self):
36
- return hash((self.url, ))
37
+ return hash((self.url,))
37
38
 
38
39
 
39
40
  class SearchResult(Result):
40
41
  def __init__(self, value, **kwargs) -> None:
41
42
  super().__init__(value, **kwargs)
42
- if value.get('error'):
43
- UserMessage(value['error'], raise_with=ValueError)
43
+ if value.get("error"):
44
+ UserMessage(value["error"], raise_with=ValueError)
44
45
  try:
45
46
  text, annotations = self._extract_text_and_annotations(value)
46
47
  if text is None:
@@ -50,7 +51,9 @@ class SearchResult(Result):
50
51
  replaced_text, ordered, starts_ends = self._insert_citation_markers(text, annotations)
51
52
  self._value = replaced_text
52
53
  self._citations = [
53
- Citation(id=cid, title=title, url=url, start=starts_ends[cid][0], end=starts_ends[cid][1])
54
+ Citation(
55
+ id=cid, title=title, url=url, start=starts_ends[cid][0], end=starts_ends[cid][1]
56
+ )
54
57
  for cid, title, url in ordered
55
58
  ]
56
59
 
@@ -59,70 +62,72 @@ class SearchResult(Result):
59
62
  UserMessage(f"Failed to parse response: {e}", raise_with=ValueError)
60
63
 
61
64
  def _extract_text(self, value) -> str | None:
62
- if isinstance(value.get('output_text'), str) and value.get('output_text'):
63
- return value.get('output_text')
65
+ if isinstance(value.get("output_text"), str) and value.get("output_text"):
66
+ return value.get("output_text")
64
67
  text = None
65
- for output in value.get('output', []):
66
- if output.get('type') == 'message' and output.get('content'):
67
- content0 = output['content'][0]
68
- if content0.get('text'):
69
- text = content0['text']
68
+ for output in value.get("output", []):
69
+ if output.get("type") == "message" and output.get("content"):
70
+ content0 = output["content"][0]
71
+ if content0.get("text"):
72
+ text = content0["text"]
70
73
  return text
71
74
 
72
75
  def _extract_text_and_annotations(self, value):
73
76
  segments = []
74
77
  global_annotations = []
75
78
  pos = 0
76
- for output in value.get('output', []) or []:
77
- if output.get('type') != 'message' or not output.get('content'):
79
+ for output in value.get("output", []) or []:
80
+ if output.get("type") != "message" or not output.get("content"):
78
81
  continue
79
- for content in output.get('content', []) or []:
80
- seg_text = content.get('text') or ''
82
+ for content in output.get("content", []) or []:
83
+ seg_text = content.get("text") or ""
81
84
  if not isinstance(seg_text, str):
82
85
  continue
83
- for ann in (content.get('annotations') or []):
84
- if ann.get('type') == 'url_citation' and ann.get('url'):
85
- start = ann.get('start_index', 0)
86
- end = ann.get('end_index', 0)
87
- global_annotations.append({
88
- 'type': 'url_citation',
89
- 'url': ann.get('url'),
90
- 'title': (ann.get('title') or '').strip(),
91
- 'start_index': pos + int(start),
92
- 'end_index': pos + int(end),
93
- })
86
+ for ann in content.get("annotations") or []:
87
+ if ann.get("type") == "url_citation" and ann.get("url"):
88
+ start = ann.get("start_index", 0)
89
+ end = ann.get("end_index", 0)
90
+ global_annotations.append(
91
+ {
92
+ "type": "url_citation",
93
+ "url": ann.get("url"),
94
+ "title": (ann.get("title") or "").strip(),
95
+ "start_index": pos + int(start),
96
+ "end_index": pos + int(end),
97
+ }
98
+ )
94
99
  segments.append(seg_text)
95
100
  pos += len(seg_text)
96
101
 
97
- built_text = ''.join(segments) if segments else None
102
+ built_text = "".join(segments) if segments else None
98
103
  # Prefer top-level output_text if present AND segments are empty (no way to compute indices)
99
- if not built_text and isinstance(value.get('output_text'), str):
100
- return value.get('output_text'), []
104
+ if not built_text and isinstance(value.get("output_text"), str):
105
+ return value.get("output_text"), []
101
106
  return built_text, global_annotations
102
107
 
103
108
  def _normalize_url(self, u: str) -> str:
104
109
  parts = urlsplit(u)
105
110
  scheme = parts.scheme.lower()
106
111
  netloc = parts.netloc.lower()
107
- path = parts.path.rstrip('/') or '/'
112
+ path = parts.path.rstrip("/") or "/"
108
113
  q = []
109
114
  for k, v in parse_qsl(parts.query, keep_blank_values=True):
110
115
  kl = k.lower()
111
- if kl in TRACKING_KEYS or kl.startswith('utm_'):
116
+ if kl in TRACKING_KEYS or kl.startswith("utm_"):
112
117
  continue
113
118
  q.append((k, v))
114
119
  query = urlencode(q, doseq=True)
115
- fragment = ''
120
+ fragment = ""
116
121
  return urlunsplit((scheme, netloc, path, query, fragment))
117
122
 
118
123
  def _make_title_map(self, annotations):
119
124
  m = {}
120
125
  for a in annotations or []:
121
- url = a.get('url')
126
+ url = a.get("url")
122
127
  if not url:
123
128
  continue
124
129
  nu = self._normalize_url(url)
125
- title = (a.get('title') or '').strip()
130
+ title = (a.get("title") or "").strip()
126
131
  if nu not in m and title:
127
132
  m[nu] = title
128
133
  return m
@@ -131,7 +136,7 @@ class SearchResult(Result):
131
136
  return urlsplit(u).netloc
132
137
 
133
138
  def _short_hash_id(self, nu: str, length=6) -> str:
134
- return hashlib.sha1(nu.encode('utf-8')).hexdigest()[:length]
139
+ return hashlib.sha1(nu.encode("utf-8")).hexdigest()[:length]
135
140
 
136
141
  def _insert_citation_markers(self, text: str, annotations):
137
142
  title_map = self._make_title_map(annotations)
@@ -140,8 +145,10 @@ class SearchResult(Result):
140
145
  ordered: list[tuple[int, str, str]] = [] # (id, title, normalized_url)
141
146
  next_id = 1
142
147
 
143
- url_anns = [a for a in annotations or [] if a.get('type') == 'url_citation' and a.get('url')]
144
- url_anns.sort(key=lambda a: int(a.get('start_index', 0)))
148
+ url_anns = [
149
+ a for a in annotations or [] if a.get("type") == "url_citation" and a.get("url")
150
+ ]
151
+ url_anns.sort(key=lambda a: int(a.get("start_index", 0)))
145
152
 
146
153
  pieces: list[str] = []
147
154
  cursor = 0
@@ -158,11 +165,11 @@ class SearchResult(Result):
158
165
  return id_map[nu]
159
166
 
160
167
  for ann in url_anns:
161
- start = int(ann.get('start_index', 0))
162
- end = int(ann.get('end_index', 0))
168
+ start = int(ann.get("start_index", 0))
169
+ end = int(ann.get("end_index", 0))
163
170
  if end <= cursor:
164
171
  continue # skip overlapping or backwards spans
165
- url = ann.get('url')
172
+ url = ann.get("url")
166
173
  nu = self._normalize_url(url)
167
174
  cid = _get_id(nu)
168
175
  title = title_map.get(nu) or self._hostname(nu)
@@ -189,7 +196,7 @@ class SearchResult(Result):
189
196
 
190
197
  tail_clean = self._strip_markdown_links(text[cursor:])
191
198
  pieces.append(tail_clean)
192
- replaced = ''.join(pieces)
199
+ replaced = "".join(pieces)
193
200
 
194
201
  starts_ends = {cid: first_span.get(cid, (0, 0)) for cid, _, _ in ordered}
195
202
  return replaced, ordered, starts_ends
@@ -197,15 +204,15 @@ class SearchResult(Result):
197
204
  def _strip_markdown_links(self, text: str) -> str:
198
205
  # Remove ([text](http...)) including surrounding parentheses
199
206
  pattern_paren = re.compile(r"\(\s*\[[^\]]+\]\(https?://[^)]+\)\s*\)")
200
- text = pattern_paren.sub('', text)
207
+ text = pattern_paren.sub("", text)
201
208
  # Remove bare [text](http...)
202
209
  pattern_bare = re.compile(r"\[[^\]]+\]\(https?://[^)]+\)")
203
- text = pattern_bare.sub('', text)
210
+ text = pattern_bare.sub("", text)
204
211
  # Remove parentheses that became empty or contain only commas/whitespace like (, , )
205
212
  pattern_empty_paren = re.compile(r"\(\s*\)")
206
- text = pattern_empty_paren.sub('', text)
213
+ text = pattern_empty_paren.sub("", text)
207
214
  pattern_commas_only = re.compile(r"\(\s*(,\s*)+\)")
208
- text = pattern_commas_only.sub('', text)
215
+ text = pattern_commas_only.sub("", text)
209
216
  # Collapse potential double spaces resulting from removals
210
217
  return re.sub(r"\s{2,}", " ", text).strip()
211
218
 
@@ -236,10 +243,12 @@ class GPTXSearchEngine(Engine):
236
243
  super().__init__()
237
244
  self.config = deepcopy(SYMAI_CONFIG)
238
245
  if api_key is not None and model is not None:
239
- self.config['SEARCH_ENGINE_API_KEY'] = api_key
240
- self.config['SEARCH_ENGINE_MODEL'] = model
241
- self.api_key = self.config.get('SEARCH_ENGINE_API_KEY')
242
- self.model = self.config.get('SEARCH_ENGINE_MODEL', 'gpt-4.1') # Default to gpt-4.1 as per docs
246
+ self.config["SEARCH_ENGINE_API_KEY"] = api_key
247
+ self.config["SEARCH_ENGINE_MODEL"] = model
248
+ self.api_key = self.config.get("SEARCH_ENGINE_API_KEY")
249
+ self.model = self.config.get(
250
+ "SEARCH_ENGINE_MODEL", "gpt-4.1"
251
+ ) # Default to gpt-4.1 as per docs
243
252
  self.name = self.__class__.__name__
244
253
  try:
245
254
  self.client = OpenAI(api_key=self.api_key)
@@ -247,9 +256,12 @@ class GPTXSearchEngine(Engine):
247
256
  UserMessage(f"Failed to initialize OpenAI client: {e}", raise_with=ValueError)
248
257
 
249
258
  def id(self) -> str:
250
- if self.config.get('SEARCH_ENGINE_API_KEY') and \
251
- self.config.get('SEARCH_ENGINE_MODEL') in OPENAI_CHAT_MODELS + OPENAI_REASONING_MODELS:
252
- return 'search'
259
+ if (
260
+ self.config.get("SEARCH_ENGINE_API_KEY")
261
+ and self.config.get("SEARCH_ENGINE_MODEL")
262
+ in OPENAI_CHAT_MODELS + OPENAI_REASONING_MODELS
263
+ ):
264
+ return "search"
253
265
  return super().id() # default to unregistered
254
266
 
255
267
  def _extract_netloc(self, raw_domain: str | None) -> str | None:
@@ -258,15 +270,15 @@ class GPTXSearchEngine(Engine):
258
270
  candidate = raw_domain.strip()
259
271
  if not candidate:
260
272
  return None
261
- parsed = urlsplit(candidate if '://' in candidate else f"//{candidate}")
273
+ parsed = urlsplit(candidate if "://" in candidate else f"//{candidate}")
262
274
  netloc = parsed.netloc or parsed.path
263
275
  if not netloc:
264
276
  return None
265
- if '@' in netloc:
266
- netloc = netloc.split('@', 1)[1]
267
- if ':' in netloc:
268
- netloc = netloc.split(':', 1)[0]
269
- netloc = netloc.strip('.').strip()
277
+ if "@" in netloc:
278
+ netloc = netloc.split("@", 1)[1]
279
+ if ":" in netloc:
280
+ netloc = netloc.split(":", 1)[0]
281
+ netloc = netloc.strip(".").strip()
270
282
  if not netloc:
271
283
  return None
272
284
  return netloc.lower()
@@ -313,38 +325,40 @@ class GPTXSearchEngine(Engine):
313
325
 
314
326
  def command(self, *args, **kwargs):
315
327
  super().command(*args, **kwargs)
316
- if 'SEARCH_ENGINE_API_KEY' in kwargs:
317
- self.api_key = kwargs['SEARCH_ENGINE_API_KEY']
318
- if 'SEARCH_ENGINE_MODEL' in kwargs:
319
- self.model = kwargs['SEARCH_ENGINE_MODEL']
328
+ if "SEARCH_ENGINE_API_KEY" in kwargs:
329
+ self.api_key = kwargs["SEARCH_ENGINE_API_KEY"]
330
+ if "SEARCH_ENGINE_MODEL" in kwargs:
331
+ self.model = kwargs["SEARCH_ENGINE_MODEL"]
320
332
 
321
333
  def forward(self, argument):
322
334
  messages = argument.prop.prepared_input
323
335
  kwargs = argument.kwargs
324
336
 
325
337
  tool_definition = {"type": "web_search"}
326
- user_location = kwargs.get('user_location')
338
+ user_location = kwargs.get("user_location")
327
339
  if user_location:
328
- tool_definition['user_location'] = user_location
340
+ tool_definition["user_location"] = user_location
329
341
 
330
- allowed_domains = self._normalize_allowed_domains(kwargs.get('allowed_domains'))
342
+ allowed_domains = self._normalize_allowed_domains(kwargs.get("allowed_domains"))
331
343
  if allowed_domains:
332
- tool_definition['filters'] = {
333
- 'allowed_domains': allowed_domains
334
- }
344
+ tool_definition["filters"] = {"allowed_domains": allowed_domains}
335
345
 
336
- self.model = kwargs.get('model', self.model) # Important for MetadataTracker to work correctly
346
+ self.model = kwargs.get(
347
+ "model", self.model
348
+ ) # Important for MetadataTracker to work correctly
337
349
 
338
350
  payload = {
339
351
  "model": self.model,
340
352
  "input": messages,
341
353
  "tools": [tool_definition],
342
- "tool_choice": {"type": "web_search"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
354
+ "tool_choice": {"type": "web_search"}
355
+ if self.model not in OPENAI_REASONING_MODELS
356
+ else "auto", # force the use of web search tool for non-reasoning models
343
357
  }
344
358
 
345
359
  if self.model in OPENAI_REASONING_MODELS:
346
- reasoning = kwargs.get('reasoning', { "effort": "low", "summary": "auto" })
347
- payload['reasoning'] = reasoning
360
+ reasoning = kwargs.get("reasoning", {"effort": "low", "summary": "auto"})
361
+ payload["reasoning"] = reasoning
348
362
 
349
363
  try:
350
364
  res = self.client.responses.create(**payload)
@@ -353,21 +367,19 @@ class GPTXSearchEngine(Engine):
353
367
  UserMessage(f"Failed to make request: {e}", raise_with=ValueError)
354
368
 
355
369
  metadata = {"raw_output": res.raw}
356
- output = [res]
370
+ output = [res]
357
371
 
358
372
  return output, metadata
359
373
 
360
374
  def prepare(self, argument):
361
- system_message = "You are a helpful AI assistant. Be precise and informative." if argument.kwargs.get('system_message') is None else argument.kwargs.get('system_message')
375
+ system_message = (
376
+ "You are a helpful AI assistant. Be precise and informative."
377
+ if argument.kwargs.get("system_message") is None
378
+ else argument.kwargs.get("system_message")
379
+ )
362
380
 
363
381
  res = [
364
- {
365
- "role": "system",
366
- "content": system_message
367
- },
368
- {
369
- "role": "user",
370
- "content": f"{argument.prop.query}"
371
- }
382
+ {"role": "system", "content": system_message},
383
+ {"role": "user", "content": f"{argument.prop.query}"},
372
384
  ]
373
385
  argument.prop.prepared_input = res