symbolicai 1.2.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
symai/__init__.py CHANGED
@@ -33,7 +33,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
33
  # Create singleton instance
34
34
  config_manager = settings.SymAIConfig()
35
35
 
36
- SYMAI_VERSION = "1.2.1"
36
+ SYMAI_VERSION = "1.4.0"
37
37
  __version__ = SYMAI_VERSION
38
38
  __root_dir__ = config_manager.config_dir
39
39
 
@@ -4,8 +4,10 @@ import tempfile
4
4
  import urllib.request
5
5
  import uuid
6
6
  import warnings
7
+ from dataclasses import dataclass
7
8
  from pathlib import Path
8
9
  from typing import Any
10
+ from urllib.parse import urlparse
9
11
 
10
12
  import numpy as np
11
13
 
@@ -148,6 +150,108 @@ Matches:
148
150
  return f"<ul>{doc_str}</ul>"
149
151
 
150
152
 
153
+ @dataclass
154
+ class Citation:
155
+ id: int
156
+ title: str
157
+ url: str
158
+ start: int
159
+ end: int
160
+
161
+ def __hash__(self):
162
+ return hash((self.url,))
163
+
164
+
165
+ class SearchResult(Result):
166
+ def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
167
+ super().__init__(value, **kwargs)
168
+ if isinstance(value, dict) and value.get("error"):
169
+ UserMessage(value["error"], raise_with=ValueError)
170
+ results = self._coerce_results(value)
171
+ text, citations = self._build_text_and_citations(results)
172
+ self._value = text
173
+ self._citations = citations
174
+
175
+ def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
176
+ if raw is None:
177
+ return []
178
+ results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
179
+ if not results:
180
+ return []
181
+ return [item for item in results if isinstance(item, dict)]
182
+
183
+ def _source_identifier(self, item: dict[str, Any], url: str) -> str:
184
+ for key in ("source_id", "sourceId", "sourceID", "id"):
185
+ raw = item.get(key)
186
+ if raw is None:
187
+ continue
188
+ text = str(raw).strip()
189
+ if text:
190
+ return text
191
+ path = Path(urlparse(url).path)
192
+ return path.name or path.as_posix() or url
193
+
194
+ def _build_text_and_citations(self, results: list[dict[str, Any]]):
195
+ pieces = []
196
+ citations = []
197
+ cursor = 0
198
+ cid = 1
199
+ separator = "\n\n---\n\n"
200
+
201
+ for item in results:
202
+ url = str(item.get("url") or "")
203
+ if not url:
204
+ continue
205
+
206
+ title = str(item.get("title") or "")
207
+ if not title:
208
+ path = Path(urlparse(url).path)
209
+ title = path.name or url
210
+
211
+ excerpts = item.get("excerpts") or []
212
+ excerpt_parts = [ex.strip() for ex in excerpts if isinstance(ex, str) and ex.strip()]
213
+ if not excerpt_parts:
214
+ continue
215
+
216
+ combined_excerpt = "\n\n".join(excerpt_parts)
217
+ source_id = self._source_identifier(item, url)
218
+ block_body = combined_excerpt if not source_id else f"{source_id}\n\n{combined_excerpt}"
219
+
220
+ if pieces:
221
+ pieces.append(separator)
222
+ cursor += len(separator)
223
+
224
+ opening_tag = "<source>\n"
225
+ pieces.append(opening_tag)
226
+ cursor += len(opening_tag)
227
+
228
+ pieces.append(block_body)
229
+ cursor += len(block_body)
230
+
231
+ closing_tag = "\n</source>"
232
+ pieces.append(closing_tag)
233
+ cursor += len(closing_tag)
234
+
235
+ marker = f"[{cid}]"
236
+ start = cursor
237
+ pieces.append(marker)
238
+ cursor += len(marker)
239
+
240
+ citations.append(Citation(id=cid, title=title or url, url=url, start=start, end=cursor))
241
+ cid += 1
242
+
243
+ return "".join(pieces), citations
244
+
245
+ def __str__(self) -> str:
246
+ return str(self._value or "")
247
+
248
+ def _repr_html_(self) -> str:
249
+ return f"<pre>{self._value or ''}</pre>"
250
+
251
+ def get_citations(self) -> list[Citation]:
252
+ return self._citations
253
+
254
+
151
255
  class QdrantIndexEngine(Engine):
152
256
  _default_url = "http://localhost:6333"
153
257
  _default_api_key = SYMAI_CONFIG.get("INDEXING_ENGINE_API_KEY", None)
@@ -421,15 +525,18 @@ class QdrantIndexEngine(Engine):
421
525
  kwargs["index_get"] = True
422
526
  self._configure_collection(**kwargs)
423
527
 
528
+ treat_as_search_engine = False
424
529
  if operation == "search":
425
530
  # Ensure collection exists - fail fast if it doesn't
426
531
  self._ensure_collection_exists(collection_name)
427
- index_top_k = kwargs.get("index_top_k", self.index_top_k)
532
+ search_kwargs = dict(kwargs)
533
+ index_top_k = search_kwargs.pop("index_top_k", self.index_top_k)
428
534
  # Optional search parameters
429
- score_threshold = kwargs.get("score_threshold")
535
+ score_threshold = search_kwargs.pop("score_threshold", None)
430
536
  # Accept both `query_filter` and `filter` for convenience
431
- raw_filter = kwargs.get("query_filter", kwargs.get("filter"))
537
+ raw_filter = search_kwargs.pop("query_filter", search_kwargs.pop("filter", None))
432
538
  query_filter = self._build_query_filter(raw_filter)
539
+ treat_as_search_engine = bool(search_kwargs.pop("treat_as_search_engine", False))
433
540
 
434
541
  # Use shared search helper that already handles retries and normalization
435
542
  rsp = self._search_sync(
@@ -438,6 +545,7 @@ class QdrantIndexEngine(Engine):
438
545
  limit=index_top_k,
439
546
  score_threshold=score_threshold,
440
547
  query_filter=query_filter,
548
+ **search_kwargs,
441
549
  )
442
550
  elif operation == "add":
443
551
  # Create collection if it doesn't exist (only for write operations)
@@ -462,7 +570,10 @@ class QdrantIndexEngine(Engine):
462
570
 
463
571
  metadata = {}
464
572
 
465
- rsp = QdrantResult(rsp, query, embedding)
573
+ if operation == "search" and treat_as_search_engine:
574
+ rsp = self._format_search_results(rsp, collection_name)
575
+ else:
576
+ rsp = QdrantResult(rsp, query, embedding)
466
577
  return [rsp], metadata
467
578
 
468
579
  def prepare(self, argument):
@@ -513,7 +624,33 @@ class QdrantIndexEngine(Engine):
513
624
  jitter=self.jitter,
514
625
  )
515
626
  def _func():
627
+ qdrant_kwargs = dict(kwargs)
516
628
  query_vector_normalized = self._normalize_vector(query_vector)
629
+ with_payload = qdrant_kwargs.pop("with_payload", True)
630
+ with_vectors = qdrant_kwargs.pop("with_vectors", self.index_values)
631
+ # qdrant-client `query_points` is strict about extra kwargs and will assert if any
632
+ # unknown arguments are provided. Because our engine `forward()` passes decorator
633
+ # kwargs through the stack, we must drop engine-internal fields here.
634
+ #
635
+ # Keep only kwargs that `qdrant_client.QdrantClient.query_points` accepts (besides
636
+ # those we pass explicitly).
637
+ if "filter" in qdrant_kwargs and "query_filter" not in qdrant_kwargs:
638
+ # Convenience alias supported by our public API
639
+ qdrant_kwargs["query_filter"] = qdrant_kwargs.pop("filter")
640
+
641
+ allowed_qdrant_kwargs = {
642
+ "using",
643
+ "prefetch",
644
+ "query_filter",
645
+ "search_params",
646
+ "offset",
647
+ "score_threshold",
648
+ "lookup_from",
649
+ "consistency",
650
+ "shard_key_selector",
651
+ "timeout",
652
+ }
653
+ qdrant_kwargs = {k: v for k, v in qdrant_kwargs.items() if k in allowed_qdrant_kwargs}
517
654
  # For single vector collections, pass vector directly to query parameter
518
655
  # For named vector collections, use Query(near_vector=NamedVector(name="vector_name", vector=...))
519
656
  # query_points API uses query_filter (not filter) for filtering
@@ -521,9 +658,9 @@ class QdrantIndexEngine(Engine):
521
658
  collection_name=collection_name,
522
659
  query=query_vector_normalized,
523
660
  limit=top_k,
524
- with_payload=True,
525
- with_vectors=self.index_values,
526
- **kwargs,
661
+ with_payload=with_payload,
662
+ with_vectors=with_vectors,
663
+ **qdrant_kwargs,
527
664
  )
528
665
  # query_points returns QueryResponse with .points attribute, extract it
529
666
  return response.points
@@ -860,6 +997,82 @@ class QdrantIndexEngine(Engine):
860
997
  # Use _query which handles retry logic and vector normalization
861
998
  return self._query(collection_name, query_vector, limit, **search_kwargs)
862
999
 
1000
+ def _resolve_payload_url(
1001
+ self, payload: dict[str, Any], collection_name: str, point_id: Any
1002
+ ) -> str:
1003
+ source = (
1004
+ payload.get("source")
1005
+ or payload.get("url")
1006
+ or payload.get("file_path")
1007
+ or payload.get("path")
1008
+ )
1009
+ if isinstance(source, str) and source:
1010
+ if source.startswith(("http://", "https://", "file://")):
1011
+ return source
1012
+
1013
+ source_path = Path(source).expanduser()
1014
+ try:
1015
+ resolved = source_path.resolve()
1016
+ if resolved.exists() or source_path.is_absolute():
1017
+ return resolved.as_uri()
1018
+ except Exception:
1019
+ return str(source_path)
1020
+ return str(source_path)
1021
+
1022
+ return f"qdrant://{collection_name}/{point_id}"
1023
+
1024
+ def _resolve_payload_title(self, payload: dict[str, Any], url: str, page: Any) -> str:
1025
+ raw_title = payload.get("title")
1026
+ if isinstance(raw_title, str) and raw_title.strip():
1027
+ base = raw_title.strip()
1028
+ else:
1029
+ parsed = urlparse(url)
1030
+ path_part = parsed.path or url
1031
+ base = Path(path_part).stem or url
1032
+
1033
+ try:
1034
+ page_int = int(page) if page is not None else None
1035
+ except (TypeError, ValueError):
1036
+ page_int = None
1037
+
1038
+ if Path(urlparse(url).path).suffix.lower() == ".pdf" and page_int is not None:
1039
+ base = f"{base}#p{page_int}"
1040
+
1041
+ return base
1042
+
1043
+ def _format_search_results(self, points: list[ScoredPoint] | None, collection_name: str):
1044
+ results: list[dict[str, Any]] = []
1045
+
1046
+ for point in points or []:
1047
+ payload = getattr(point, "payload", {}) or {}
1048
+ text = payload.get("text") or payload.get("content")
1049
+ if isinstance(text, list):
1050
+ text = " ".join([t for t in text if isinstance(t, str)])
1051
+ if not isinstance(text, str):
1052
+ continue
1053
+ excerpt = text.strip()
1054
+ if not excerpt:
1055
+ continue
1056
+
1057
+ page = payload.get("page") or payload.get("page_number") or payload.get("pageIndex")
1058
+ url = self._resolve_payload_url(payload, collection_name, getattr(point, "id", ""))
1059
+ title = self._resolve_payload_title(payload, url, page)
1060
+
1061
+ results.append(
1062
+ {
1063
+ "url": url,
1064
+ "title": title,
1065
+ "excerpts": [excerpt],
1066
+ "source_id": payload.get("source_id")
1067
+ or payload.get("sourceId")
1068
+ or payload.get("chunk_id")
1069
+ or payload.get("chunkId")
1070
+ or getattr(point, "id", None),
1071
+ }
1072
+ )
1073
+
1074
+ return SearchResult({"results": results})
1075
+
863
1076
  async def search(
864
1077
  self,
865
1078
  collection_name: str,
@@ -923,7 +1136,7 @@ class QdrantIndexEngine(Engine):
923
1136
  if tmp_path.exists():
924
1137
  tmp_path.unlink()
925
1138
 
926
- async def chunk_and_upsert( # noqa: C901
1139
+ async def chunk_and_upsert(
927
1140
  self,
928
1141
  collection_name: str,
929
1142
  text: str | Symbol | None = None,
@@ -1001,8 +1214,7 @@ class QdrantIndexEngine(Engine):
1001
1214
  # Add source to metadata if not already present
1002
1215
  if metadata is None:
1003
1216
  metadata = {}
1004
- if "source" not in metadata:
1005
- metadata["source"] = doc_path.name
1217
+ metadata["source"] = str(doc_path.resolve())
1006
1218
 
1007
1219
  # Handle document_url: download and read file using FileReader
1008
1220
  elif document_url is not None:
@@ -74,7 +74,8 @@ class SearchResult(Result):
74
74
  self._citations: list[Citation] = []
75
75
  try:
76
76
  results = self._coerce_results(value)
77
- text, citations = self._build_text_and_citations(results)
77
+ task_meta = self._extract_task_metadata(value)
78
+ text, citations = self._build_text_and_citations(results, task_meta=task_meta)
78
79
  self._value = text
79
80
  self._citations = citations
80
81
  except Exception as e:
@@ -87,13 +88,26 @@ class SearchResult(Result):
87
88
  results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
88
89
  if not results:
89
90
  return []
90
- coerced: list[dict[str, Any]] = []
91
+ coerced = []
91
92
  for item in results:
92
93
  if item is None:
93
94
  continue
94
95
  coerced.append(_item_to_mapping(item))
95
96
  return coerced
96
97
 
98
+ def _extract_task_metadata(self, raw: Any) -> dict[str, Any] | None:
99
+ if not isinstance(raw, dict):
100
+ return None
101
+ task_output = raw.get("task_output")
102
+ if task_output is None:
103
+ return None
104
+ output_value = task_output.get("output") if isinstance(task_output, dict) else None
105
+ return {
106
+ "reasoning": raw.get("task_reasoning"),
107
+ "answer": output_value,
108
+ "confidence": raw.get("task_confidence"),
109
+ }
110
+
97
111
  def _normalize_url(self, url: str) -> str:
98
112
  parts = urlsplit(url)
99
113
  scheme = parts.scheme.lower() if parts.scheme else "https"
@@ -139,11 +153,40 @@ class SearchResult(Result):
139
153
  cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
140
154
  return cleaned.strip()
141
155
 
142
- def _build_text_and_citations(self, results: list[dict[str, Any]]):
143
- pieces: list[str] = []
144
- citations: list[Citation] = []
156
+ def _build_text_and_citations(
157
+ self, results: list[dict[str, Any]], *, task_meta: dict[str, Any] | None = None
158
+ ):
159
+ pieces = []
160
+ citations = []
145
161
  cursor = 0
146
- seen_urls: set[str] = set()
162
+
163
+ if task_meta:
164
+ reasoning = task_meta.get("reasoning")
165
+ answer = task_meta.get("answer")
166
+ confidence = task_meta.get("confidence")
167
+
168
+ if reasoning:
169
+ block = f"<reasoning>\n{reasoning}\n</reasoning>"
170
+ pieces.append(block)
171
+ cursor += len(block)
172
+
173
+ if answer:
174
+ if pieces:
175
+ pieces.append("\n\n")
176
+ cursor += 2
177
+ block = f"<answer>\n{answer}\n</answer>"
178
+ pieces.append(block)
179
+ cursor += len(block)
180
+
181
+ if confidence:
182
+ if pieces:
183
+ pieces.append("\n\n")
184
+ cursor += 2
185
+ block = f"<answer_confidence>\n{confidence}\n</answer_confidence>"
186
+ pieces.append(block)
187
+ cursor += len(block)
188
+
189
+ seen_urls = set()
147
190
  cid = 1
148
191
  separator = "\n\n---\n\n"
149
192
 
@@ -158,13 +201,8 @@ class SearchResult(Result):
158
201
 
159
202
  title = str(item.get("title") or "") or urlsplit(normalized_url).netloc
160
203
  excerpts = item.get("excerpts") or []
161
- excerpt_parts: list[str] = []
162
- for ex in excerpts:
163
- if not isinstance(ex, str):
164
- continue
165
- sanitized = self._sanitize_excerpt(ex)
166
- if sanitized:
167
- excerpt_parts.append(sanitized)
204
+ excerpt_parts = [self._sanitize_excerpt(ex) for ex in excerpts]
205
+ excerpt_parts = [p for p in excerpt_parts if p]
168
206
  if not excerpt_parts:
169
207
  continue
170
208
 
@@ -255,16 +293,14 @@ class ExtractResult(Result):
255
293
  super().__init__(value, **kwargs)
256
294
  try:
257
295
  results = self._coerce_results(value)
258
- content_parts: list[str] = []
296
+ content_parts = []
259
297
  for r in results:
260
- excerpts = r.get("excerpts") or []
261
298
  full = r.get("full_content")
262
- if isinstance(full, str):
299
+ if full is not None:
263
300
  content_parts.append(full)
264
- elif full is not None:
265
- content_parts.append(str(full))
266
- elif excerpts:
267
- content_parts.extend([s for s in excerpts if isinstance(s, str)])
301
+ else:
302
+ excerpts = r.get("excerpts") or []
303
+ content_parts.extend(excerpts)
268
304
  self._value = "\n\n".join(content_parts)
269
305
  except Exception as e:
270
306
  self._value = None
@@ -276,7 +312,7 @@ class ExtractResult(Result):
276
312
  results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
277
313
  if not results:
278
314
  return []
279
- coerced: list[dict[str, Any]] = []
315
+ coerced = []
280
316
  for item in results:
281
317
  if item is None:
282
318
  continue
@@ -344,8 +380,8 @@ class ParallelEngine(Engine):
344
380
  def _normalize_include_domains(self, domains: list[str] | None) -> list[str]:
345
381
  if not isinstance(domains, list):
346
382
  return []
347
- seen: set[str] = set()
348
- out: list[str] = []
383
+ seen = set()
384
+ out = []
349
385
  for d in domains:
350
386
  netloc = self._extract_netloc(d)
351
387
  if not netloc or netloc in seen:
@@ -361,8 +397,8 @@ class ParallelEngine(Engine):
361
397
  def _normalize_exclude_domains(self, domains: list[str] | None) -> list[str]:
362
398
  if not isinstance(domains, list):
363
399
  return []
364
- seen: set[str] = set()
365
- out: list[str] = []
400
+ seen = set()
401
+ out = []
366
402
  for d in domains:
367
403
  netloc = self._extract_netloc(d)
368
404
  if not netloc or netloc in seen:
@@ -382,7 +418,7 @@ class ParallelEngine(Engine):
382
418
  text = value.strip()
383
419
  return [text] if text else []
384
420
  if isinstance(value, list):
385
- cleaned: list[str] = []
421
+ cleaned = []
386
422
  for item in value:
387
423
  if item is None:
388
424
  continue
@@ -429,7 +465,7 @@ class ParallelEngine(Engine):
429
465
  excerpts = {"max_chars_per_result": max_chars_per_result}
430
466
  include = self._normalize_include_domains(kwargs.get("allowed_domains"))
431
467
  exclude = self._normalize_exclude_domains(kwargs.get("excluded_domains"))
432
- source_policy: dict[str, Any] | None = None
468
+ source_policy = None
433
469
  if include or exclude:
434
470
  source_policy = {}
435
471
  if include:
@@ -457,7 +493,7 @@ class ParallelEngine(Engine):
457
493
 
458
494
  include = self._normalize_include_domains(kwargs.get("allowed_domains"))
459
495
  exclude = self._normalize_exclude_domains(kwargs.get("excluded_domains"))
460
- source_policy: dict[str, Any] | None = None
496
+ source_policy = None
461
497
  if include or exclude:
462
498
  source_policy = {}
463
499
  if include:
@@ -542,7 +578,7 @@ class ParallelEngine(Engine):
542
578
  source_policy: dict[str, Any] | None,
543
579
  task_spec: Any,
544
580
  ):
545
- task_kwargs: dict[str, Any] = {
581
+ task_kwargs = {
546
582
  "input": task_input,
547
583
  "processor": processor,
548
584
  }
@@ -559,7 +595,7 @@ class ParallelEngine(Engine):
559
595
  UserMessage(f"Failed to create Parallel task: {e}", raise_with=ValueError)
560
596
 
561
597
  def _fetch_task_result(self, run_id: str, *, timeout: Any, api_timeout: int | None):
562
- result_kwargs: dict[str, Any] = {}
598
+ result_kwargs = {}
563
599
  if api_timeout is not None:
564
600
  result_kwargs["api_timeout"] = api_timeout
565
601
  if timeout is not None:
@@ -570,36 +606,40 @@ class ParallelEngine(Engine):
570
606
  UserMessage(f"Failed to fetch Parallel task result: {e}", raise_with=ValueError)
571
607
 
572
608
  def _task_result_to_search_payload(self, task_result: Any) -> dict[str, Any]:
573
- payload: dict[str, Any] = {"results": []}
574
- output = getattr(task_result, "output", None)
609
+ payload = {"results": []}
610
+ output = task_result.output
575
611
  if output is None:
576
612
  return payload
577
613
 
578
- basis_items = getattr(output, "basis", None) or []
614
+ basis_items = output.basis or []
579
615
  for idx, basis in enumerate(basis_items):
580
616
  payload["results"].extend(self._basis_to_results(basis, basis_index=idx))
581
617
 
582
618
  if not payload["results"]:
583
619
  payload["results"].append(self._task_fallback_result(output, basis_items))
584
620
 
585
- payload["task_output"] = getattr(output, "content", None)
586
- payload["task_output_type"] = getattr(output, "type", None)
621
+ payload["task_output"] = output.content
622
+ payload["task_output_type"] = output.type
623
+
624
+ if basis_items:
625
+ first_basis = basis_items[0]
626
+ payload["task_reasoning"] = first_basis.reasoning
627
+ payload["task_confidence"] = first_basis.confidence
628
+
587
629
  return payload
588
630
 
589
631
  def _basis_to_results(self, basis: Any, *, basis_index: int) -> list[dict[str, Any]]:
590
- raw_reasoning = getattr(basis, "reasoning", "") or ""
591
- reasoning = raw_reasoning if isinstance(raw_reasoning, str) else str(raw_reasoning)
592
- raw_field = getattr(basis, "field", "") or ""
593
- field_title = raw_field if isinstance(raw_field, str) else str(raw_field)
632
+ reasoning = basis.reasoning or ""
633
+ field_title = basis.field or ""
594
634
  if not field_title.strip():
595
635
  field_title = "Parallel Task Output"
596
- citations = getattr(basis, "citations", None) or []
636
+ citations = basis.citations or []
597
637
  if not citations:
598
638
  if not reasoning:
599
639
  return []
600
640
  citations = [None]
601
641
 
602
- results: list[dict[str, Any]] = []
642
+ results = []
603
643
  # Convert field titles to lowercase slugs by swapping non-alphanumerics for hyphens.
604
644
  slug = re.sub(r"[^a-z0-9]+", "-", field_title.lower()).strip("-") or "field"
605
645
  basis_url = f"parallel://task-output/{basis_index:04d}-{slug}"
@@ -609,10 +649,9 @@ class ParallelEngine(Engine):
609
649
  title = field_title
610
650
  excerpts = [reasoning]
611
651
  else:
612
- url = str(getattr(citation, "url", "") or "")
613
- title = str(getattr(citation, "title", "") or field_title)
614
- raw_excerpts = getattr(citation, "excerpts", None) or []
615
- excerpts = [snippet for snippet in raw_excerpts if isinstance(snippet, str)]
652
+ url = str(citation.url or "")
653
+ title = str(citation.title or field_title)
654
+ excerpts = citation.excerpts or []
616
655
  if not excerpts and reasoning:
617
656
  excerpts = [reasoning]
618
657
  results.append(
@@ -625,7 +664,7 @@ class ParallelEngine(Engine):
625
664
  return results
626
665
 
627
666
  def _task_fallback_result(self, output: Any, basis_items: list[Any]) -> dict[str, Any]:
628
- content = getattr(output, "content", None)
667
+ content = output.content
629
668
  if isinstance(content, str):
630
669
  snippet = content
631
670
  elif isinstance(content, (dict, list)):
@@ -633,9 +672,9 @@ class ParallelEngine(Engine):
633
672
  else:
634
673
  snippet = str(content or "")
635
674
  if not snippet:
636
- extra_reasoning: list[str] = []
675
+ extra_reasoning = []
637
676
  for basis in basis_items:
638
- raw_value = getattr(basis, "reasoning", "") or ""
677
+ raw_value = basis.reasoning or ""
639
678
  if isinstance(raw_value, str):
640
679
  extra_reasoning.append(raw_value)
641
680
  else:
@@ -665,13 +704,13 @@ class ParallelEngine(Engine):
665
704
  def forward(self, argument):
666
705
  kwargs = argument.kwargs
667
706
  # Route based on presence of URL vs Query
668
- url = getattr(argument.prop, "url", None) or kwargs.get("url")
707
+ url = argument.prop.url or kwargs.get("url")
669
708
  if url:
670
709
  return self._extract(str(url), kwargs)
671
710
 
672
- raw_query = getattr(argument.prop, "prepared_input", None)
711
+ raw_query = argument.prop.prepared_input
673
712
  if raw_query is None:
674
- raw_query = getattr(argument.prop, "query", None)
713
+ raw_query = argument.prop.query
675
714
  search_queries = self._coerce_search_queries(raw_query)
676
715
  if not search_queries:
677
716
  UserMessage(
@@ -685,11 +724,11 @@ class ParallelEngine(Engine):
685
724
 
686
725
  def prepare(self, argument):
687
726
  # For scraping: store URL directly. For search: pass through query string.
688
- url = argument.kwargs.get("url") or getattr(argument.prop, "url", None)
727
+ url = argument.kwargs.get("url") or argument.prop.url
689
728
  if url:
690
729
  argument.prop.prepared_input = str(url)
691
730
  return
692
- query = getattr(argument.prop, "query", None)
731
+ query = argument.prop.query
693
732
  if isinstance(query, list):
694
733
  argument.prop.prepared_input = self._coerce_search_queries(query)
695
734
  return
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,57 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...backend.engines.index.engine_qdrant import QdrantIndexEngine
4
+ from ...symbol import Expression, Symbol
5
+
6
+ if TYPE_CHECKING:
7
+ from ...backend.engines.index.engine_qdrant import SearchResult
8
+
9
+
10
+ class local_search(Expression):
11
+ def __init__(self, index_name: str = QdrantIndexEngine._default_index_name, *args, **kwargs):
12
+ super().__init__(*args, **kwargs)
13
+ self.index_name = index_name
14
+ self.name = self.__class__.__name__
15
+
16
+ def search(self, query: Symbol, **kwargs) -> "SearchResult":
17
+ symbol = self._to_symbol(query)
18
+ options = dict(kwargs)
19
+
20
+ index_name = options.pop("collection_name", options.pop("index_name", self.index_name))
21
+
22
+ # Normalize limit/top_k/index_top_k
23
+ index_top_k = options.pop("index_top_k", None)
24
+ if index_top_k is None:
25
+ top_k = options.pop("top_k", None)
26
+ limit = options.pop("limit", None)
27
+ index_top_k = top_k if top_k is not None else limit
28
+ if index_top_k is not None:
29
+ options["index_top_k"] = index_top_k
30
+
31
+ # Bypass decorator/EngineRepository pipeline entirely (and thus `forward()`).
32
+ # We query Qdrant directly and then format results into the same SearchResult
33
+ # structure used by `parallel.search` (citations, inline markers, etc.).
34
+ engine = QdrantIndexEngine(index_name=index_name)
35
+ try:
36
+ score_threshold = options.pop("score_threshold", None)
37
+ raw_filter = options.pop("query_filter", options.pop("filter", None))
38
+ query_filter = engine._build_query_filter(raw_filter)
39
+
40
+ # Keep `with_payload` default aligned with engine behavior; let caller override.
41
+ with_payload = options.pop("with_payload", True)
42
+ with_vectors = options.pop("with_vectors", False)
43
+
44
+ points = engine._search_sync(
45
+ collection_name=index_name,
46
+ query_vector=symbol.embedding,
47
+ limit=options.pop("index_top_k", engine.index_top_k),
48
+ score_threshold=score_threshold,
49
+ query_filter=query_filter,
50
+ with_payload=with_payload,
51
+ with_vectors=with_vectors,
52
+ **options,
53
+ )
54
+ result = engine._format_search_results(points, index_name)
55
+ finally:
56
+ del engine
57
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: symbolicai
3
- Version: 1.2.1
3
+ Version: 1.4.0
4
4
  Summary: A Neurosymbolic Perspective on Large Language Models
5
5
  Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
6
6
  License: BSD 3-Clause License
@@ -136,6 +136,8 @@ Requires-Dist: symbolicai[serpapi]; extra == "all"
136
136
  Requires-Dist: symbolicai[services]; extra == "all"
137
137
  Requires-Dist: symbolicai[solver]; extra == "all"
138
138
  Requires-Dist: symbolicai[qdrant]; extra == "all"
139
+ Provides-Extra: dev
140
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
139
141
  Dynamic: license-file
140
142
 
141
143
  # **SymbolicAI: A neuro-symbolic perspective on LLMs**
@@ -1,5 +1,5 @@
1
1
  symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
2
- symai/__init__.py,sha256=7dv4gZERByf3qd42_39-ETnt4FaKCgOcZV1I1rcnOX4,18530
2
+ symai/__init__.py,sha256=s7UwW7LIsUjcCHapKxUsO0MhWH-98vO3gPUhsNxOZW8,18530
3
3
  symai/chat.py,sha256=DCEbmZ96wv-eitAVt6-oF6PT3JM3cT59Iy3r2Hucd_M,14100
4
4
  symai/components.py,sha256=s10kLvwAOjSBQQohoHGtAIKs0UHHCd_HhiRvMbNtIH0,64685
5
5
  symai/constraints.py,sha256=ljjB9p0qK4DrDl_u5G_Y-Y6WAH5ZHANIqLLxRtwcORs,1980
@@ -41,7 +41,7 @@ symai/backend/engines/files/engine_io.py,sha256=4eYBz44rQYWD7VO6Pn7hVF_cOnqNuolo
41
41
  symai/backend/engines/imagecaptioning/engine_blip2.py,sha256=8lTzc8sQpuNY4AUb_ZweRKr95v-sFtTykT5ennVf6g0,2915
42
42
  symai/backend/engines/imagecaptioning/engine_llavacpp_client.py,sha256=jBsLZv0Laa4tuPyX0VQ7uwyldyO3aYIbbj73WjTbceM,6793
43
43
  symai/backend/engines/index/engine_pinecone.py,sha256=fxCew1ldUdjd9UtqnMuWFDiVz5X5BUIKZtq1iSDhj28,9132
44
- symai/backend/engines/index/engine_qdrant.py,sha256=GtWVbgaqJuATfGus0A0h7EgM_8hKlbw3fnorNJmbC_Q,43300
44
+ symai/backend/engines/index/engine_qdrant.py,sha256=U9p0kzYvHE4DjFgxnvnG_8xfEoP_W4dpaBGY5gTFMF4,50994
45
45
  symai/backend/engines/index/engine_vectordb.py,sha256=xXU8QaC2BX9O4dDjDCVYgWO4PxQMpmNlhtal6UVtV0o,8541
46
46
  symai/backend/engines/lean/engine_lean4.py,sha256=ln5nbQn5szq8nRulbREPLCPQ5bwjM_A5XAGMkfzPdT8,10102
47
47
  symai/backend/engines/neurosymbolic/__init__.py,sha256=o7HUmxcYSrIkutGYB-6_Qur3adHyrkVeWroDtqEK-YE,2279
@@ -61,7 +61,7 @@ symai/backend/engines/ocr/engine_apilayer.py,sha256=UpC3oHBdSM6wlPVqxwMkemBd-Y0R
61
61
  symai/backend/engines/output/engine_stdout.py,sha256=BWNXACl5U-WYIJnT1pZNwZsTRMzP1XzA0A7o693mmyQ,899
62
62
  symai/backend/engines/scrape/engine_requests.py,sha256=yyVFT9JrZ4S6v5U_cykef-tn5iWGl1MAdpqnDaQ70TA,13821
63
63
  symai/backend/engines/search/engine_openai.py,sha256=hAEu3vPZzLTvgmNc4BSZDTcNb4ek4xYeOf8xgti2zRs,14248
64
- symai/backend/engines/search/engine_parallel.py,sha256=PybgfkpJ_rA5FkVebZisfXwWIcki2AJPxqZfnWPl5To,26422
64
+ symai/backend/engines/search/engine_parallel.py,sha256=vhRavd_LStk6grV1aDZiHWfW9v1uDnCLX0BT8smiV84,27008
65
65
  symai/backend/engines/search/engine_perplexity.py,sha256=rXnZjMCSiIRuJcNSchE58-f9zWJmYpkKMHONF_XwGnk,4100
66
66
  symai/backend/engines/search/engine_serpapi.py,sha256=ZJJBnEDoLjkpxWt_o4vFZanwqojH8ZFBWmWNnEaIbww,3618
67
67
  symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=EOUh2GCeEhZ2Av72i_AZ4NSj9e46Pl7Ft6sIErFy6FI,8387
@@ -100,7 +100,7 @@ symai/extended/solver.py,sha256=Men8FcGlUdUHJCw0lb1rKAwLOGp5-d5Rnuf2sx5Q6PM,1173
100
100
  symai/extended/summarizer.py,sha256=x7yKOU-tXmvHZxmyKrPoy5_Dy9-Zet1oAcDK8uvQSRI,1052
101
101
  symai/extended/taypan_interpreter.py,sha256=yPIcI-NcpNpfDb3r3KiclP9XwzvFo_enoZOgK1JM3NI,4832
102
102
  symai/extended/vectordb.py,sha256=npCR9WBfV6RN3OQZuJAELpwz1sM6q1btKqrVaW5jPvs,13546
103
- symai/extended/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
+ symai/extended/interfaces/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
104
104
  symai/extended/interfaces/blip_2.py,sha256=wZYVzql6w_OJMUZc1c2BKx3LHrlapRprx-Q6p99_qxE,463
105
105
  symai/extended/interfaces/clip.py,sha256=l6vjEq3cF-wDX9cRPulyiKpDFQB8QI2609GcGtvqt7U,514
106
106
  symai/extended/interfaces/console.py,sha256=qeAnG80f95ArADjfpk57AaDA1cHUQSkaUrau2zGNSKs,637
@@ -110,6 +110,7 @@ symai/extended/interfaces/flux.py,sha256=LTY_I9UtIxnh3Nc4cBPQhQ6upB6CVZIhc1uOnFp
110
110
  symai/extended/interfaces/gpt_image.py,sha256=Jk5-9og440eZeRAhKmjdyhwP22wX58q0NcFuVhIFWZQ,718
111
111
  symai/extended/interfaces/input.py,sha256=CFMLf2j_a-rZ1ApaEwfgqZmWVS7_1yj_u6iiqtiOGPs,456
112
112
  symai/extended/interfaces/llava.py,sha256=yCItfGYSk35RazhEfHR4R324h-R6W5DjZYeJBonDkRU,433
113
+ symai/extended/interfaces/local_search.py,sha256=AHHRsYCUm4VttGSl_HAk5kpH34e0x_uzvgy1OXSubSs,2408
113
114
  symai/extended/interfaces/naive_scrape.py,sha256=KPjTSBXSCr5zwHwIPgF-VwLSTD2OjVcL4xALNX4l9-4,682
114
115
  symai/extended/interfaces/naive_vectordb.py,sha256=fm7DBMYYnSx7Ma7eNnCmuOVyQwNGnkiDR31oV-qNrJA,1348
115
116
  symai/extended/interfaces/ocr.py,sha256=MMxgp8ZKoM44doJPZzzrBVh2VxChs6faFu2uFYnbzfU,563
@@ -162,9 +163,9 @@ symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
162
163
  symai/server/huggingface_server.py,sha256=wSAVqFiKQsCu5UB2YYVpxJBhJ7GgQBBfePxNi265yP8,9039
163
164
  symai/server/llama_cpp_server.py,sha256=-WPTNB2cbnwtnpES4AtPM__MCasDKl83jr94JGS9tmI,2144
164
165
  symai/server/qdrant_server.py,sha256=l4r4rz29c7cO1dapXO0LQ4sHW4WF44keuz7j8v5azMc,9854
165
- symbolicai-1.2.1.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
166
- symbolicai-1.2.1.dist-info/METADATA,sha256=B1Jb04tBfVJ14P2PtmpTr_tsvyKzK1QGCnGCk6bJVkU,23603
167
- symbolicai-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
168
- symbolicai-1.2.1.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
169
- symbolicai-1.2.1.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
170
- symbolicai-1.2.1.dist-info/RECORD,,
166
+ symbolicai-1.4.0.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
167
+ symbolicai-1.4.0.dist-info/METADATA,sha256=dlAY-AhPA52x_fmXU-i7h6rA-M1Mf0qJ00OeOUyplGs,23676
168
+ symbolicai-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
169
+ symbolicai-1.4.0.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
170
+ symbolicai-1.4.0.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
171
+ symbolicai-1.4.0.dist-info/RECORD,,