symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. symai/__init__.py +198 -134
  2. symai/backend/base.py +51 -51
  3. symai/backend/engines/drawing/engine_bfl.py +33 -33
  4. symai/backend/engines/drawing/engine_gpt_image.py +4 -10
  5. symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
  6. symai/backend/engines/embedding/engine_openai.py +22 -16
  7. symai/backend/engines/execute/engine_python.py +16 -16
  8. symai/backend/engines/files/engine_io.py +51 -49
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
  11. symai/backend/engines/index/engine_pinecone.py +116 -88
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +78 -52
  14. symai/backend/engines/lean/engine_lean4.py +65 -25
  15. symai/backend/engines/neurosymbolic/__init__.py +28 -28
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
  21. symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
  26. symai/backend/engines/ocr/engine_apilayer.py +18 -20
  27. symai/backend/engines/output/engine_stdout.py +9 -9
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
  29. symai/backend/engines/search/engine_openai.py +95 -83
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +40 -41
  32. symai/backend/engines/search/engine_serpapi.py +33 -28
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
  35. symai/backend/engines/text_to_speech/engine_openai.py +15 -19
  36. symai/backend/engines/text_vision/engine_clip.py +34 -28
  37. symai/backend/engines/userinput/engine_console.py +3 -4
  38. symai/backend/mixin/anthropic.py +48 -40
  39. symai/backend/mixin/deepseek.py +4 -5
  40. symai/backend/mixin/google.py +5 -4
  41. symai/backend/mixin/groq.py +2 -4
  42. symai/backend/mixin/openai.py +132 -110
  43. symai/backend/settings.py +14 -14
  44. symai/chat.py +164 -94
  45. symai/collect/dynamic.py +13 -11
  46. symai/collect/pipeline.py +39 -31
  47. symai/collect/stats.py +109 -69
  48. symai/components.py +556 -238
  49. symai/constraints.py +14 -5
  50. symai/core.py +1495 -1210
  51. symai/core_ext.py +55 -50
  52. symai/endpoints/api.py +113 -58
  53. symai/extended/api_builder.py +22 -17
  54. symai/extended/arxiv_pdf_parser.py +13 -5
  55. symai/extended/bibtex_parser.py +8 -4
  56. symai/extended/conversation.py +88 -69
  57. symai/extended/document.py +40 -27
  58. symai/extended/file_merger.py +45 -7
  59. symai/extended/graph.py +38 -24
  60. symai/extended/html_style_template.py +17 -11
  61. symai/extended/interfaces/blip_2.py +1 -1
  62. symai/extended/interfaces/clip.py +4 -2
  63. symai/extended/interfaces/console.py +5 -3
  64. symai/extended/interfaces/dall_e.py +3 -1
  65. symai/extended/interfaces/file.py +2 -0
  66. symai/extended/interfaces/flux.py +3 -1
  67. symai/extended/interfaces/gpt_image.py +15 -6
  68. symai/extended/interfaces/input.py +2 -1
  69. symai/extended/interfaces/llava.py +1 -1
  70. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
  71. symai/extended/interfaces/naive_vectordb.py +2 -2
  72. symai/extended/interfaces/ocr.py +4 -2
  73. symai/extended/interfaces/openai_search.py +2 -0
  74. symai/extended/interfaces/parallel.py +30 -0
  75. symai/extended/interfaces/perplexity.py +2 -0
  76. symai/extended/interfaces/pinecone.py +6 -4
  77. symai/extended/interfaces/python.py +2 -0
  78. symai/extended/interfaces/serpapi.py +2 -0
  79. symai/extended/interfaces/terminal.py +0 -1
  80. symai/extended/interfaces/tts.py +2 -1
  81. symai/extended/interfaces/whisper.py +2 -1
  82. symai/extended/interfaces/wolframalpha.py +1 -0
  83. symai/extended/metrics/__init__.py +1 -1
  84. symai/extended/metrics/similarity.py +5 -2
  85. symai/extended/os_command.py +31 -22
  86. symai/extended/packages/symdev.py +39 -34
  87. symai/extended/packages/sympkg.py +30 -27
  88. symai/extended/packages/symrun.py +46 -35
  89. symai/extended/repo_cloner.py +10 -9
  90. symai/extended/seo_query_optimizer.py +15 -12
  91. symai/extended/solver.py +104 -76
  92. symai/extended/summarizer.py +8 -7
  93. symai/extended/taypan_interpreter.py +10 -9
  94. symai/extended/vectordb.py +28 -15
  95. symai/formatter/formatter.py +39 -31
  96. symai/formatter/regex.py +46 -44
  97. symai/functional.py +184 -86
  98. symai/imports.py +85 -51
  99. symai/interfaces.py +1 -1
  100. symai/memory.py +33 -24
  101. symai/menu/screen.py +28 -19
  102. symai/misc/console.py +27 -27
  103. symai/misc/loader.py +4 -3
  104. symai/models/base.py +147 -76
  105. symai/models/errors.py +1 -1
  106. symai/ops/__init__.py +1 -1
  107. symai/ops/measures.py +17 -14
  108. symai/ops/primitives.py +933 -635
  109. symai/post_processors.py +28 -24
  110. symai/pre_processors.py +58 -52
  111. symai/processor.py +15 -9
  112. symai/prompts.py +714 -649
  113. symai/server/huggingface_server.py +115 -32
  114. symai/server/llama_cpp_server.py +14 -6
  115. symai/server/qdrant_server.py +206 -0
  116. symai/shell.py +98 -39
  117. symai/shellsv.py +307 -223
  118. symai/strategy.py +135 -81
  119. symai/symbol.py +276 -225
  120. symai/utils.py +62 -46
  121. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
  122. symbolicai-1.1.0.dist-info/RECORD +168 -0
  123. symbolicai-1.0.0.dist-info/RECORD +0 -163
  124. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  125. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  126. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  127. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,665 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from copy import deepcopy
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
8
+
9
+ from ....symbol import Result
10
+ from ....utils import UserMessage
11
+ from ...base import Engine
12
+ from ...settings import SYMAI_CONFIG
13
+
14
+ logging.getLogger("requests").setLevel(logging.ERROR)
15
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
16
+ logging.getLogger("httpx").setLevel(logging.ERROR)
17
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
18
+
19
+ try:
20
+ from parallel import Parallel
21
+ from parallel.resources.task_run import build_task_spec_param
22
+
23
+ logging.getLogger("parallel").setLevel(logging.ERROR)
24
+ except ImportError as exc:
25
+ msg = (
26
+ "parallel-web SDK is not installed. Install with 'pip install parallel-web' "
27
+ "or add it to your environment."
28
+ )
29
+ UserMessage(msg)
30
+ raise RuntimeError(msg) from exc
31
+
32
+
33
+ TRACKING_KEYS = {
34
+ "utm_source",
35
+ "utm_medium",
36
+ "utm_campaign",
37
+ "utm_term",
38
+ "utm_content",
39
+ }
40
+
41
+
42
+ def _item_to_mapping(item: Any) -> dict[str, Any]:
43
+ if isinstance(item, dict):
44
+ return item
45
+ if hasattr(item, "model_dump"):
46
+ try:
47
+ return dict(item.model_dump())
48
+ except TypeError:
49
+ return dict(item.model_dump(mode="python"))
50
+ if hasattr(item, "dict"):
51
+ return dict(item.dict())
52
+ if hasattr(item, "__dict__"):
53
+ return deepcopy({k: v for k, v in item.__dict__.items() if not k.startswith("_")})
54
+ return {}
55
+
56
+
57
+ @dataclass
58
+ class Citation:
59
+ id: int
60
+ title: str
61
+ url: str
62
+ start: int
63
+ end: int
64
+
65
+ def __hash__(self):
66
+ return hash((self.url,))
67
+
68
+
69
+ class SearchResult(Result):
70
+ def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
71
+ super().__init__(value, **kwargs)
72
+ if isinstance(value, dict) and value.get("error"):
73
+ UserMessage(value["error"], raise_with=ValueError)
74
+ self._citations: list[Citation] = []
75
+ try:
76
+ results = self._coerce_results(value)
77
+ text, citations = self._build_text_and_citations(results)
78
+ self._value = text
79
+ self._citations = citations
80
+ except Exception as e:
81
+ self._value = None
82
+ UserMessage(f"Failed to parse Parallel search response: {e}", raise_with=ValueError)
83
+
84
+ def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
85
+ if raw is None:
86
+ return []
87
+ results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
88
+ if not results:
89
+ return []
90
+ coerced: list[dict[str, Any]] = []
91
+ for item in results:
92
+ if item is None:
93
+ continue
94
+ coerced.append(_item_to_mapping(item))
95
+ return coerced
96
+
97
+ def _normalize_url(self, url: str) -> str:
98
+ parts = urlsplit(url)
99
+ scheme = parts.scheme.lower() if parts.scheme else "https"
100
+ netloc = parts.netloc.lower()
101
+ path = parts.path.rstrip("/") or "/"
102
+ filtered_query = [
103
+ (k, v)
104
+ for k, v in parse_qsl(parts.query, keep_blank_values=True)
105
+ if k not in TRACKING_KEYS and not k.lower().startswith("utm_")
106
+ ]
107
+ query = urlencode(filtered_query, doseq=True)
108
+ return urlunsplit((scheme, netloc, path, query, ""))
109
+
110
+ def _strip_markdown_links(self, text: str) -> str:
111
+ # Matches Markdown links like "[label](https://example.com "title")" and captures only the label.
112
+ pattern = re.compile(
113
+ r"\[(?P<label>[^\]]+)\]\((?P<url>https?://[^)\s]+)(?:\s+\"[^\"]*\")?\)"
114
+ )
115
+
116
+ def _replacement(match: re.Match) -> str:
117
+ label = match.group("label") or ""
118
+ return label.strip()
119
+
120
+ cleaned = pattern.sub(_replacement, text)
121
+ # Remove lingering empty parentheses that previously wrapped the stripped links.
122
+ cleaned = re.sub(r"\(\s*\)", "", cleaned)
123
+ # Remove parentheses that contain only commas or whitespace remnants.
124
+ return re.sub(r"\(\s*(,\s*)+\)", "", cleaned)
125
+
126
+ def _strip_square_brackets(self, text: str) -> str:
127
+ def _replacement(match: re.Match) -> str:
128
+ return match.group(1) or ""
129
+
130
+ # Replace bracketed fragments with their inner text so literal '[' or ']' do not leak into the output.
131
+ return re.sub(r"\[([^\]]*)\]", _replacement, text).replace("[", "").replace("]", "")
132
+
133
+ def _sanitize_excerpt(self, text: str) -> str:
134
+ cleaned = self._strip_markdown_links(text)
135
+ cleaned = self._strip_square_brackets(cleaned)
136
+ # Collapse consecutive spaces/tabs down to a single space for readability.
137
+ cleaned = re.sub(r"[ \t]{2,}", " ", cleaned)
138
+ # Shrink runs of three or more blank lines to a double newline spacer.
139
+ cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
140
+ return cleaned.strip()
141
+
142
+ def _build_text_and_citations(self, results: list[dict[str, Any]]):
143
+ pieces: list[str] = []
144
+ citations: list[Citation] = []
145
+ cursor = 0
146
+ seen_urls: set[str] = set()
147
+ cid = 1
148
+ separator = "\n\n---\n\n"
149
+
150
+ for item in results:
151
+ url = str(item.get("url") or "")
152
+ if not url:
153
+ continue
154
+ normalized_url = self._normalize_url(url)
155
+ if normalized_url in seen_urls:
156
+ continue
157
+ seen_urls.add(normalized_url)
158
+
159
+ title = str(item.get("title") or "") or urlsplit(normalized_url).netloc
160
+ excerpts = item.get("excerpts") or []
161
+ excerpt_parts: list[str] = []
162
+ for ex in excerpts:
163
+ if not isinstance(ex, str):
164
+ continue
165
+ sanitized = self._sanitize_excerpt(ex)
166
+ if sanitized:
167
+ excerpt_parts.append(sanitized)
168
+ if not excerpt_parts:
169
+ continue
170
+
171
+ combined_excerpt = "\n\n".join(excerpt_parts)
172
+ source_id = self._coerce_source_identifier(
173
+ item, url=normalized_url, fallback=f"source-{cid}"
174
+ )
175
+ block_body = combined_excerpt
176
+ if source_id:
177
+ block_body = f"{source_id}\n\n{combined_excerpt}"
178
+
179
+ if pieces:
180
+ pieces.append(separator)
181
+ cursor += len(separator)
182
+
183
+ opening_tag = "<source>\n"
184
+ pieces.append(opening_tag)
185
+ cursor += len(opening_tag)
186
+
187
+ pieces.append(block_body)
188
+ cursor += len(block_body)
189
+
190
+ closing_tag = "\n</source>"
191
+ pieces.append(closing_tag)
192
+ cursor += len(closing_tag)
193
+
194
+ marker = f"[{cid}]"
195
+ start = cursor
196
+ pieces.append(marker)
197
+ cursor += len(marker)
198
+
199
+ citations.append(
200
+ Citation(id=cid, title=title, url=normalized_url, start=start, end=cursor)
201
+ )
202
+ cid += 1
203
+
204
+ text = "".join(pieces)
205
+ return text, citations
206
+
207
+ def _coerce_source_identifier(self, item: dict[str, Any], *, url: str, fallback: str) -> str:
208
+ for key in ("source_id", "sourceId", "sourceID", "id"):
209
+ candidate = self._sanitize_source_identifier(item.get(key))
210
+ if candidate:
211
+ return candidate
212
+
213
+ split_url = urlsplit(url)
214
+ derived = split_url.netloc or split_url.path or url
215
+ candidate = self._sanitize_source_identifier(derived)
216
+ if candidate:
217
+ return candidate
218
+ return fallback
219
+
220
+ def _sanitize_source_identifier(self, raw: Any) -> str:
221
+ if raw is None:
222
+ return ""
223
+ text = str(raw).strip()
224
+ if not text:
225
+ return ""
226
+ # Replace any character outside [A-Za-z0-9._:-] with hyphens so IDs are safe for tag embedding.
227
+ sanitized = re.sub(r"[^A-Za-z0-9._:-]+", "-", text)
228
+ sanitized = sanitized.strip("-")
229
+ return sanitized or ""
230
+
231
+ def __str__(self) -> str:
232
+ if isinstance(self._value, str) and self._value:
233
+ return self._value
234
+ try:
235
+ return json.dumps(self.raw, indent=2)
236
+ except TypeError:
237
+ return str(self.raw)
238
+
239
+ def _repr_html_(self) -> str:
240
+ if isinstance(self._value, str) and self._value:
241
+ return f"<pre>{self._value}</pre>"
242
+ try:
243
+ return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
244
+ except Exception:
245
+ return f"<pre>{self.raw!s}</pre>"
246
+
247
+ def get_citations(self) -> list[Citation]:
248
+ return self._citations
249
+
250
+
251
+ class ExtractResult(Result):
252
+ """Result wrapper for Parallel Extract API responses."""
253
+
254
+ def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
255
+ super().__init__(value, **kwargs)
256
+ try:
257
+ results = self._coerce_results(value)
258
+ content_parts: list[str] = []
259
+ for r in results:
260
+ excerpts = r.get("excerpts") or []
261
+ full = r.get("full_content")
262
+ if isinstance(full, str):
263
+ content_parts.append(full)
264
+ elif full is not None:
265
+ content_parts.append(str(full))
266
+ elif excerpts:
267
+ content_parts.extend([s for s in excerpts if isinstance(s, str)])
268
+ self._value = "\n\n".join(content_parts)
269
+ except Exception as e:
270
+ self._value = None
271
+ UserMessage(f"Failed to parse Parallel extract response: {e}", raise_with=ValueError)
272
+
273
+ def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
274
+ if raw is None:
275
+ return []
276
+ results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
277
+ if not results:
278
+ return []
279
+ coerced: list[dict[str, Any]] = []
280
+ for item in results:
281
+ if item is None:
282
+ continue
283
+ coerced.append(_item_to_mapping(item))
284
+ return coerced
285
+
286
+ def __str__(self) -> str:
287
+ try:
288
+ return str(self._value or "")
289
+ except Exception:
290
+ return ""
291
+
292
+ def _repr_html_(self) -> str:
293
+ try:
294
+ return f"<pre>{self._value or ''}</pre>"
295
+ except Exception:
296
+ return "<pre></pre>"
297
+
298
+
299
+ class ParallelEngine(Engine):
300
+ MAX_INCLUDE_DOMAINS = 10
301
+
302
+ def __init__(self, api_key: str | None = None):
303
+ super().__init__()
304
+ self.config = deepcopy(SYMAI_CONFIG)
305
+ self.api_key = api_key or self.config.get("SEARCH_ENGINE_API_KEY")
306
+ self.model = self.config.get("SEARCH_ENGINE_MODEL")
307
+ self.name = self.__class__.__name__
308
+
309
+ try:
310
+ self.client = Parallel(api_key=self.api_key)
311
+ except Exception as e:
312
+ UserMessage(f"Failed to initialize Parallel client: {e}", raise_with=ValueError)
313
+
314
+ def id(self) -> str:
315
+ # Register as a search engine when configured with the 'parallel' model token
316
+ if (
317
+ self.config.get("SEARCH_ENGINE_API_KEY")
318
+ and str(self.config.get("SEARCH_ENGINE_MODEL", "")).lower() == "parallel"
319
+ ):
320
+ return "search"
321
+ return super().id()
322
+
323
+ def command(self, *args, **kwargs):
324
+ super().command(*args, **kwargs)
325
+ if "SEARCH_ENGINE_API_KEY" in kwargs:
326
+ self.api_key = kwargs["SEARCH_ENGINE_API_KEY"]
327
+ if "SEARCH_ENGINE_MODEL" in kwargs:
328
+ self.model = kwargs["SEARCH_ENGINE_MODEL"]
329
+
330
+ def _extract_netloc(self, raw: str | None) -> str | None:
331
+ if not isinstance(raw, str):
332
+ return None
333
+ s = raw.strip()
334
+ if not s:
335
+ return None
336
+ parts = urlsplit(s if "://" in s else f"//{s}")
337
+ netloc = parts.netloc or parts.path
338
+ netloc = netloc.split("@", 1)[-1]
339
+ netloc = netloc.split(":", 1)[0]
340
+ netloc = netloc.strip(".").strip().lower()
341
+ return netloc or None
342
+
343
+ def _normalize_include_domains(self, domains: list[str] | None) -> list[str]:
344
+ if not isinstance(domains, list):
345
+ return []
346
+ seen: set[str] = set()
347
+ out: list[str] = []
348
+ for d in domains:
349
+ netloc = self._extract_netloc(d)
350
+ if not netloc or netloc in seen:
351
+ continue
352
+ if not self._is_valid_domain(netloc):
353
+ # Skip strings that are not apex domains or bare TLD patterns
354
+ continue
355
+ seen.add(netloc)
356
+ out.append(netloc)
357
+ if len(out) >= self.MAX_INCLUDE_DOMAINS:
358
+ break
359
+ return out
360
+
361
+ def _coerce_search_queries(self, value: Any) -> list[str]:
362
+ if value is None:
363
+ return []
364
+ if isinstance(value, str):
365
+ text = value.strip()
366
+ return [text] if text else []
367
+ if isinstance(value, list):
368
+ cleaned: list[str] = []
369
+ for item in value:
370
+ if item is None:
371
+ continue
372
+ text = str(item).strip()
373
+ if text:
374
+ cleaned.append(text)
375
+ return cleaned
376
+ text = str(value).strip()
377
+ return [text] if text else []
378
+
379
+ def _is_valid_domain(self, s: str) -> bool:
380
+ """Validate apex domains or bare extension filters.
381
+
382
+ Accepts:
383
+ - Apex/sub domains like "example.com", "www.arstechnica.com"
384
+ - Bare extension patterns like ".gov", ".co.uk"
385
+ Rejects:
386
+ - Values without a dot (e.g., "tomshardware")
387
+ - Schemes, paths, or ports (filtered earlier by _extract_netloc)
388
+ """
389
+ if not s:
390
+ return False
391
+ if s.startswith("."):
392
+ # Allow bare domain extensions like .gov or .co.uk
393
+ remainder = s[1:]
394
+ return bool((remainder and "." in remainder) or remainder.isalpha())
395
+ # Require at least one dot and valid label characters
396
+ # Matches a single DNS label: 1-63 chars, alphanumeric at both ends, hyphens allowed internally.
397
+ label_re = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?$")
398
+ parts = s.split(".")
399
+ if len(parts) < 2:
400
+ return False
401
+ return all(label_re.fullmatch(p or "") for p in parts)
402
+
403
+ def _search(self, queries: list[str], kwargs: dict[str, Any]):
404
+ if not queries:
405
+ UserMessage(
406
+ "ParallelEngine._search requires at least one query.", raise_with=ValueError
407
+ )
408
+
409
+ mode = kwargs.get("mode") or "one-shot"
410
+ max_results = kwargs.get("max_results", 10)
411
+ max_chars_per_result = kwargs.get("max_chars_per_result", 15000)
412
+ excerpts = {"max_chars_per_result": max_chars_per_result}
413
+ include = self._normalize_include_domains(kwargs.get("allowed_domains"))
414
+ source_policy = {"include_domains": include} if include else None
415
+ objective = kwargs.get("objective")
416
+
417
+ try:
418
+ result = self.client.beta.search(
419
+ objective=objective,
420
+ search_queries=queries,
421
+ max_results=max_results,
422
+ excerpts=excerpts,
423
+ mode=mode,
424
+ source_policy=source_policy,
425
+ )
426
+ except Exception as e:
427
+ UserMessage(f"Failed to call Parallel Search API: {e}", raise_with=ValueError)
428
+ return [SearchResult(result)], {"raw_output": result}
429
+
430
+ def _task(self, queries: list[str], kwargs: dict[str, Any]):
431
+ processor_name = self._coerce_processor(kwargs.get("processor"))
432
+ task_input = self._compose_task_input(queries)
433
+
434
+ include = self._normalize_include_domains(kwargs.get("allowed_domains"))
435
+ source_policy = {"include_domains": include} if include else None
436
+ metadata = self._coerce_metadata(kwargs.get("metadata"))
437
+
438
+ output_schema = (
439
+ kwargs.get("task_output_schema")
440
+ or kwargs.get("task_output")
441
+ or kwargs.get("output_schema")
442
+ or kwargs.get("output")
443
+ )
444
+ task_spec_param = self._build_task_spec(output_schema, task_input)
445
+ timeout, api_timeout = self._collect_task_timeouts(kwargs)
446
+
447
+ run = self._create_task_run(
448
+ task_input=task_input,
449
+ processor=processor_name,
450
+ metadata=metadata,
451
+ source_policy=source_policy,
452
+ task_spec=task_spec_param,
453
+ )
454
+ result = self._fetch_task_result(run.run_id, timeout=timeout, api_timeout=api_timeout)
455
+
456
+ payload = self._task_result_to_search_payload(result)
457
+ return [SearchResult(payload)], {
458
+ "raw_output": result,
459
+ "task_output": payload.get("task_output"),
460
+ "task_output_type": payload.get("task_output_type"),
461
+ }
462
+
463
+ def _coerce_processor(self, processor: Any) -> str:
464
+ if processor is None:
465
+ UserMessage("ParallelEngine.task requires a processor.", raise_with=ValueError)
466
+ value = processor.strip() if isinstance(processor, str) else str(processor).strip()
467
+ if not value:
468
+ UserMessage(
469
+ "ParallelEngine.task requires a non-empty processor.", raise_with=ValueError
470
+ )
471
+ return value
472
+
473
+ def _compose_task_input(self, queries: list[str]) -> str:
474
+ if not queries:
475
+ UserMessage(
476
+ "ParallelEngine.task requires at least one query input.", raise_with=ValueError
477
+ )
478
+ if len(queries) == 1:
479
+ return queries[0]
480
+ return "\n\n".join(f"{idx}. {q}" for idx, q in enumerate(queries, start=1))
481
+
482
+ def _coerce_metadata(self, metadata: Any) -> dict[str, Any] | None:
483
+ if metadata is None or isinstance(metadata, dict):
484
+ return metadata
485
+ return None
486
+
487
+ def _build_task_spec(self, output_schema: Any, task_input: str):
488
+ if output_schema is None:
489
+ return None
490
+ try:
491
+ return build_task_spec_param(output_schema, task_input)
492
+ except Exception as exc:
493
+ UserMessage(f"Invalid task output schema: {exc}", raise_with=ValueError)
494
+
495
+ def _collect_task_timeouts(self, kwargs: dict[str, Any]) -> tuple[Any, int | None]:
496
+ timeout = kwargs.get("task_timeout") or kwargs.get("timeout")
497
+ api_timeout = kwargs.get("task_api_timeout") or kwargs.get("api_timeout")
498
+ if api_timeout is None:
499
+ return timeout, None
500
+ try:
501
+ return timeout, int(api_timeout)
502
+ except (TypeError, ValueError) as exc:
503
+ UserMessage(f"api_timeout must be numeric: {exc}", raise_with=ValueError)
504
+
505
+ def _create_task_run(
506
+ self,
507
+ *,
508
+ task_input: str,
509
+ processor: str,
510
+ metadata: dict[str, Any] | None,
511
+ source_policy: dict[str, Any] | None,
512
+ task_spec: Any,
513
+ ):
514
+ task_kwargs: dict[str, Any] = {
515
+ "input": task_input,
516
+ "processor": processor,
517
+ }
518
+ if metadata is not None:
519
+ task_kwargs["metadata"] = metadata
520
+ if source_policy is not None:
521
+ task_kwargs["source_policy"] = source_policy
522
+ if task_spec is not None:
523
+ task_kwargs["task_spec"] = task_spec
524
+
525
+ try:
526
+ return self.client.task_run.create(**task_kwargs)
527
+ except Exception as e:
528
+ UserMessage(f"Failed to create Parallel task: {e}", raise_with=ValueError)
529
+
530
+ def _fetch_task_result(self, run_id: str, *, timeout: Any, api_timeout: int | None):
531
+ result_kwargs: dict[str, Any] = {}
532
+ if api_timeout is not None:
533
+ result_kwargs["api_timeout"] = api_timeout
534
+ if timeout is not None:
535
+ result_kwargs["timeout"] = timeout
536
+ try:
537
+ return self.client.task_run.result(run_id, **result_kwargs)
538
+ except Exception as e:
539
+ UserMessage(f"Failed to fetch Parallel task result: {e}", raise_with=ValueError)
540
+
541
+ def _task_result_to_search_payload(self, task_result: Any) -> dict[str, Any]:
542
+ payload: dict[str, Any] = {"results": []}
543
+ output = getattr(task_result, "output", None)
544
+ if output is None:
545
+ return payload
546
+
547
+ basis_items = getattr(output, "basis", None) or []
548
+ for idx, basis in enumerate(basis_items):
549
+ payload["results"].extend(self._basis_to_results(basis, basis_index=idx))
550
+
551
+ if not payload["results"]:
552
+ payload["results"].append(self._task_fallback_result(output, basis_items))
553
+
554
+ payload["task_output"] = getattr(output, "content", None)
555
+ payload["task_output_type"] = getattr(output, "type", None)
556
+ return payload
557
+
558
+ def _basis_to_results(self, basis: Any, *, basis_index: int) -> list[dict[str, Any]]:
559
+ raw_reasoning = getattr(basis, "reasoning", "") or ""
560
+ reasoning = raw_reasoning if isinstance(raw_reasoning, str) else str(raw_reasoning)
561
+ raw_field = getattr(basis, "field", "") or ""
562
+ field_title = raw_field if isinstance(raw_field, str) else str(raw_field)
563
+ if not field_title.strip():
564
+ field_title = "Parallel Task Output"
565
+ citations = getattr(basis, "citations", None) or []
566
+ if not citations:
567
+ if not reasoning:
568
+ return []
569
+ citations = [None]
570
+
571
+ results: list[dict[str, Any]] = []
572
+ # Convert field titles to lowercase slugs by swapping non-alphanumerics for hyphens.
573
+ slug = re.sub(r"[^a-z0-9]+", "-", field_title.lower()).strip("-") or "field"
574
+ basis_url = f"parallel://task-output/{basis_index:04d}-{slug}"
575
+ for citation in citations:
576
+ if citation is None:
577
+ url = basis_url
578
+ title = field_title
579
+ excerpts = [reasoning]
580
+ else:
581
+ url = str(getattr(citation, "url", "") or "")
582
+ title = str(getattr(citation, "title", "") or field_title)
583
+ raw_excerpts = getattr(citation, "excerpts", None) or []
584
+ excerpts = [snippet for snippet in raw_excerpts if isinstance(snippet, str)]
585
+ if not excerpts and reasoning:
586
+ excerpts = [reasoning]
587
+ results.append(
588
+ {
589
+ "url": url or basis_url,
590
+ "title": title or field_title,
591
+ "excerpts": excerpts or ([reasoning] if reasoning else []),
592
+ }
593
+ )
594
+ return results
595
+
596
+ def _task_fallback_result(self, output: Any, basis_items: list[Any]) -> dict[str, Any]:
597
+ content = getattr(output, "content", None)
598
+ if isinstance(content, str):
599
+ snippet = content
600
+ elif isinstance(content, (dict, list)):
601
+ snippet = json.dumps(content, ensure_ascii=False)
602
+ else:
603
+ snippet = str(content or "")
604
+ if not snippet:
605
+ extra_reasoning: list[str] = []
606
+ for basis in basis_items:
607
+ raw_value = getattr(basis, "reasoning", "") or ""
608
+ if isinstance(raw_value, str):
609
+ extra_reasoning.append(raw_value)
610
+ else:
611
+ extra_reasoning.append(str(raw_value))
612
+ snippet = " ".join(r for r in extra_reasoning if r) or "Parallel task output"
613
+ return {
614
+ "url": "parallel://task-output",
615
+ "title": "Parallel Task Output",
616
+ "excerpts": [snippet],
617
+ }
618
+
619
+ def _extract(self, url: str, kwargs: dict[str, Any]):
620
+ excerpts = kwargs.get("excerpts", True)
621
+ full_content = kwargs.get("full_content", False)
622
+ objective = kwargs.get("objective")
623
+ try:
624
+ result = self.client.beta.extract(
625
+ urls=[url],
626
+ objective=objective,
627
+ excerpts=excerpts,
628
+ full_content=full_content,
629
+ )
630
+ except Exception as e:
631
+ UserMessage(f"Failed to call Parallel Extract API: {e}", raise_with=ValueError)
632
+ return [ExtractResult(result)], {"raw_output": result, "final_url": url}
633
+
634
+ def forward(self, argument):
635
+ kwargs = argument.kwargs
636
+ # Route based on presence of URL vs Query
637
+ url = getattr(argument.prop, "url", None) or kwargs.get("url")
638
+ if url:
639
+ return self._extract(str(url), kwargs)
640
+
641
+ raw_query = getattr(argument.prop, "prepared_input", None)
642
+ if raw_query is None:
643
+ raw_query = getattr(argument.prop, "query", None)
644
+ search_queries = self._coerce_search_queries(raw_query)
645
+ if not search_queries:
646
+ UserMessage(
647
+ "ParallelEngine.forward requires at least one non-empty query or url.",
648
+ raise_with=ValueError,
649
+ )
650
+ processor = kwargs.get("processor")
651
+ if processor is not None:
652
+ return self._task(search_queries, kwargs)
653
+ return self._search(search_queries, kwargs)
654
+
655
+ def prepare(self, argument):
656
+ # For scraping: store URL directly. For search: pass through query string.
657
+ url = argument.kwargs.get("url") or getattr(argument.prop, "url", None)
658
+ if url:
659
+ argument.prop.prepared_input = str(url)
660
+ return
661
+ query = getattr(argument.prop, "query", None)
662
+ if isinstance(query, list):
663
+ argument.prop.prepared_input = self._coerce_search_queries(query)
664
+ return
665
+ argument.prop.prepared_input = str(query or "").strip()