symbolicai 1.2.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +1 -1
- symai/backend/engines/index/engine_qdrant.py +222 -10
- symai/backend/engines/search/engine_parallel.py +92 -53
- symai/extended/interfaces/__init__.py +1 -0
- symai/extended/interfaces/local_search.py +57 -0
- {symbolicai-1.2.1.dist-info → symbolicai-1.4.0.dist-info}/METADATA +3 -1
- {symbolicai-1.2.1.dist-info → symbolicai-1.4.0.dist-info}/RECORD +11 -10
- {symbolicai-1.2.1.dist-info → symbolicai-1.4.0.dist-info}/WHEEL +0 -0
- {symbolicai-1.2.1.dist-info → symbolicai-1.4.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-1.2.1.dist-info → symbolicai-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-1.2.1.dist-info → symbolicai-1.4.0.dist-info}/top_level.txt +0 -0
symai/__init__.py
CHANGED
|
@@ -4,8 +4,10 @@ import tempfile
|
|
|
4
4
|
import urllib.request
|
|
5
5
|
import uuid
|
|
6
6
|
import warnings
|
|
7
|
+
from dataclasses import dataclass
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any
|
|
10
|
+
from urllib.parse import urlparse
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
13
|
|
|
@@ -148,6 +150,108 @@ Matches:
|
|
|
148
150
|
return f"<ul>{doc_str}</ul>"
|
|
149
151
|
|
|
150
152
|
|
|
153
|
+
@dataclass
|
|
154
|
+
class Citation:
|
|
155
|
+
id: int
|
|
156
|
+
title: str
|
|
157
|
+
url: str
|
|
158
|
+
start: int
|
|
159
|
+
end: int
|
|
160
|
+
|
|
161
|
+
def __hash__(self):
|
|
162
|
+
return hash((self.url,))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class SearchResult(Result):
|
|
166
|
+
def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
|
|
167
|
+
super().__init__(value, **kwargs)
|
|
168
|
+
if isinstance(value, dict) and value.get("error"):
|
|
169
|
+
UserMessage(value["error"], raise_with=ValueError)
|
|
170
|
+
results = self._coerce_results(value)
|
|
171
|
+
text, citations = self._build_text_and_citations(results)
|
|
172
|
+
self._value = text
|
|
173
|
+
self._citations = citations
|
|
174
|
+
|
|
175
|
+
def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
|
|
176
|
+
if raw is None:
|
|
177
|
+
return []
|
|
178
|
+
results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
|
|
179
|
+
if not results:
|
|
180
|
+
return []
|
|
181
|
+
return [item for item in results if isinstance(item, dict)]
|
|
182
|
+
|
|
183
|
+
def _source_identifier(self, item: dict[str, Any], url: str) -> str:
|
|
184
|
+
for key in ("source_id", "sourceId", "sourceID", "id"):
|
|
185
|
+
raw = item.get(key)
|
|
186
|
+
if raw is None:
|
|
187
|
+
continue
|
|
188
|
+
text = str(raw).strip()
|
|
189
|
+
if text:
|
|
190
|
+
return text
|
|
191
|
+
path = Path(urlparse(url).path)
|
|
192
|
+
return path.name or path.as_posix() or url
|
|
193
|
+
|
|
194
|
+
def _build_text_and_citations(self, results: list[dict[str, Any]]):
|
|
195
|
+
pieces = []
|
|
196
|
+
citations = []
|
|
197
|
+
cursor = 0
|
|
198
|
+
cid = 1
|
|
199
|
+
separator = "\n\n---\n\n"
|
|
200
|
+
|
|
201
|
+
for item in results:
|
|
202
|
+
url = str(item.get("url") or "")
|
|
203
|
+
if not url:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
title = str(item.get("title") or "")
|
|
207
|
+
if not title:
|
|
208
|
+
path = Path(urlparse(url).path)
|
|
209
|
+
title = path.name or url
|
|
210
|
+
|
|
211
|
+
excerpts = item.get("excerpts") or []
|
|
212
|
+
excerpt_parts = [ex.strip() for ex in excerpts if isinstance(ex, str) and ex.strip()]
|
|
213
|
+
if not excerpt_parts:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
combined_excerpt = "\n\n".join(excerpt_parts)
|
|
217
|
+
source_id = self._source_identifier(item, url)
|
|
218
|
+
block_body = combined_excerpt if not source_id else f"{source_id}\n\n{combined_excerpt}"
|
|
219
|
+
|
|
220
|
+
if pieces:
|
|
221
|
+
pieces.append(separator)
|
|
222
|
+
cursor += len(separator)
|
|
223
|
+
|
|
224
|
+
opening_tag = "<source>\n"
|
|
225
|
+
pieces.append(opening_tag)
|
|
226
|
+
cursor += len(opening_tag)
|
|
227
|
+
|
|
228
|
+
pieces.append(block_body)
|
|
229
|
+
cursor += len(block_body)
|
|
230
|
+
|
|
231
|
+
closing_tag = "\n</source>"
|
|
232
|
+
pieces.append(closing_tag)
|
|
233
|
+
cursor += len(closing_tag)
|
|
234
|
+
|
|
235
|
+
marker = f"[{cid}]"
|
|
236
|
+
start = cursor
|
|
237
|
+
pieces.append(marker)
|
|
238
|
+
cursor += len(marker)
|
|
239
|
+
|
|
240
|
+
citations.append(Citation(id=cid, title=title or url, url=url, start=start, end=cursor))
|
|
241
|
+
cid += 1
|
|
242
|
+
|
|
243
|
+
return "".join(pieces), citations
|
|
244
|
+
|
|
245
|
+
def __str__(self) -> str:
|
|
246
|
+
return str(self._value or "")
|
|
247
|
+
|
|
248
|
+
def _repr_html_(self) -> str:
|
|
249
|
+
return f"<pre>{self._value or ''}</pre>"
|
|
250
|
+
|
|
251
|
+
def get_citations(self) -> list[Citation]:
|
|
252
|
+
return self._citations
|
|
253
|
+
|
|
254
|
+
|
|
151
255
|
class QdrantIndexEngine(Engine):
|
|
152
256
|
_default_url = "http://localhost:6333"
|
|
153
257
|
_default_api_key = SYMAI_CONFIG.get("INDEXING_ENGINE_API_KEY", None)
|
|
@@ -421,15 +525,18 @@ class QdrantIndexEngine(Engine):
|
|
|
421
525
|
kwargs["index_get"] = True
|
|
422
526
|
self._configure_collection(**kwargs)
|
|
423
527
|
|
|
528
|
+
treat_as_search_engine = False
|
|
424
529
|
if operation == "search":
|
|
425
530
|
# Ensure collection exists - fail fast if it doesn't
|
|
426
531
|
self._ensure_collection_exists(collection_name)
|
|
427
|
-
|
|
532
|
+
search_kwargs = dict(kwargs)
|
|
533
|
+
index_top_k = search_kwargs.pop("index_top_k", self.index_top_k)
|
|
428
534
|
# Optional search parameters
|
|
429
|
-
score_threshold =
|
|
535
|
+
score_threshold = search_kwargs.pop("score_threshold", None)
|
|
430
536
|
# Accept both `query_filter` and `filter` for convenience
|
|
431
|
-
raw_filter =
|
|
537
|
+
raw_filter = search_kwargs.pop("query_filter", search_kwargs.pop("filter", None))
|
|
432
538
|
query_filter = self._build_query_filter(raw_filter)
|
|
539
|
+
treat_as_search_engine = bool(search_kwargs.pop("treat_as_search_engine", False))
|
|
433
540
|
|
|
434
541
|
# Use shared search helper that already handles retries and normalization
|
|
435
542
|
rsp = self._search_sync(
|
|
@@ -438,6 +545,7 @@ class QdrantIndexEngine(Engine):
|
|
|
438
545
|
limit=index_top_k,
|
|
439
546
|
score_threshold=score_threshold,
|
|
440
547
|
query_filter=query_filter,
|
|
548
|
+
**search_kwargs,
|
|
441
549
|
)
|
|
442
550
|
elif operation == "add":
|
|
443
551
|
# Create collection if it doesn't exist (only for write operations)
|
|
@@ -462,7 +570,10 @@ class QdrantIndexEngine(Engine):
|
|
|
462
570
|
|
|
463
571
|
metadata = {}
|
|
464
572
|
|
|
465
|
-
|
|
573
|
+
if operation == "search" and treat_as_search_engine:
|
|
574
|
+
rsp = self._format_search_results(rsp, collection_name)
|
|
575
|
+
else:
|
|
576
|
+
rsp = QdrantResult(rsp, query, embedding)
|
|
466
577
|
return [rsp], metadata
|
|
467
578
|
|
|
468
579
|
def prepare(self, argument):
|
|
@@ -513,7 +624,33 @@ class QdrantIndexEngine(Engine):
|
|
|
513
624
|
jitter=self.jitter,
|
|
514
625
|
)
|
|
515
626
|
def _func():
|
|
627
|
+
qdrant_kwargs = dict(kwargs)
|
|
516
628
|
query_vector_normalized = self._normalize_vector(query_vector)
|
|
629
|
+
with_payload = qdrant_kwargs.pop("with_payload", True)
|
|
630
|
+
with_vectors = qdrant_kwargs.pop("with_vectors", self.index_values)
|
|
631
|
+
# qdrant-client `query_points` is strict about extra kwargs and will assert if any
|
|
632
|
+
# unknown arguments are provided. Because our engine `forward()` passes decorator
|
|
633
|
+
# kwargs through the stack, we must drop engine-internal fields here.
|
|
634
|
+
#
|
|
635
|
+
# Keep only kwargs that `qdrant_client.QdrantClient.query_points` accepts (besides
|
|
636
|
+
# those we pass explicitly).
|
|
637
|
+
if "filter" in qdrant_kwargs and "query_filter" not in qdrant_kwargs:
|
|
638
|
+
# Convenience alias supported by our public API
|
|
639
|
+
qdrant_kwargs["query_filter"] = qdrant_kwargs.pop("filter")
|
|
640
|
+
|
|
641
|
+
allowed_qdrant_kwargs = {
|
|
642
|
+
"using",
|
|
643
|
+
"prefetch",
|
|
644
|
+
"query_filter",
|
|
645
|
+
"search_params",
|
|
646
|
+
"offset",
|
|
647
|
+
"score_threshold",
|
|
648
|
+
"lookup_from",
|
|
649
|
+
"consistency",
|
|
650
|
+
"shard_key_selector",
|
|
651
|
+
"timeout",
|
|
652
|
+
}
|
|
653
|
+
qdrant_kwargs = {k: v for k, v in qdrant_kwargs.items() if k in allowed_qdrant_kwargs}
|
|
517
654
|
# For single vector collections, pass vector directly to query parameter
|
|
518
655
|
# For named vector collections, use Query(near_vector=NamedVector(name="vector_name", vector=...))
|
|
519
656
|
# query_points API uses query_filter (not filter) for filtering
|
|
@@ -521,9 +658,9 @@ class QdrantIndexEngine(Engine):
|
|
|
521
658
|
collection_name=collection_name,
|
|
522
659
|
query=query_vector_normalized,
|
|
523
660
|
limit=top_k,
|
|
524
|
-
with_payload=
|
|
525
|
-
with_vectors=
|
|
526
|
-
**
|
|
661
|
+
with_payload=with_payload,
|
|
662
|
+
with_vectors=with_vectors,
|
|
663
|
+
**qdrant_kwargs,
|
|
527
664
|
)
|
|
528
665
|
# query_points returns QueryResponse with .points attribute, extract it
|
|
529
666
|
return response.points
|
|
@@ -860,6 +997,82 @@ class QdrantIndexEngine(Engine):
|
|
|
860
997
|
# Use _query which handles retry logic and vector normalization
|
|
861
998
|
return self._query(collection_name, query_vector, limit, **search_kwargs)
|
|
862
999
|
|
|
1000
|
+
def _resolve_payload_url(
|
|
1001
|
+
self, payload: dict[str, Any], collection_name: str, point_id: Any
|
|
1002
|
+
) -> str:
|
|
1003
|
+
source = (
|
|
1004
|
+
payload.get("source")
|
|
1005
|
+
or payload.get("url")
|
|
1006
|
+
or payload.get("file_path")
|
|
1007
|
+
or payload.get("path")
|
|
1008
|
+
)
|
|
1009
|
+
if isinstance(source, str) and source:
|
|
1010
|
+
if source.startswith(("http://", "https://", "file://")):
|
|
1011
|
+
return source
|
|
1012
|
+
|
|
1013
|
+
source_path = Path(source).expanduser()
|
|
1014
|
+
try:
|
|
1015
|
+
resolved = source_path.resolve()
|
|
1016
|
+
if resolved.exists() or source_path.is_absolute():
|
|
1017
|
+
return resolved.as_uri()
|
|
1018
|
+
except Exception:
|
|
1019
|
+
return str(source_path)
|
|
1020
|
+
return str(source_path)
|
|
1021
|
+
|
|
1022
|
+
return f"qdrant://{collection_name}/{point_id}"
|
|
1023
|
+
|
|
1024
|
+
def _resolve_payload_title(self, payload: dict[str, Any], url: str, page: Any) -> str:
|
|
1025
|
+
raw_title = payload.get("title")
|
|
1026
|
+
if isinstance(raw_title, str) and raw_title.strip():
|
|
1027
|
+
base = raw_title.strip()
|
|
1028
|
+
else:
|
|
1029
|
+
parsed = urlparse(url)
|
|
1030
|
+
path_part = parsed.path or url
|
|
1031
|
+
base = Path(path_part).stem or url
|
|
1032
|
+
|
|
1033
|
+
try:
|
|
1034
|
+
page_int = int(page) if page is not None else None
|
|
1035
|
+
except (TypeError, ValueError):
|
|
1036
|
+
page_int = None
|
|
1037
|
+
|
|
1038
|
+
if Path(urlparse(url).path).suffix.lower() == ".pdf" and page_int is not None:
|
|
1039
|
+
base = f"{base}#p{page_int}"
|
|
1040
|
+
|
|
1041
|
+
return base
|
|
1042
|
+
|
|
1043
|
+
def _format_search_results(self, points: list[ScoredPoint] | None, collection_name: str):
|
|
1044
|
+
results: list[dict[str, Any]] = []
|
|
1045
|
+
|
|
1046
|
+
for point in points or []:
|
|
1047
|
+
payload = getattr(point, "payload", {}) or {}
|
|
1048
|
+
text = payload.get("text") or payload.get("content")
|
|
1049
|
+
if isinstance(text, list):
|
|
1050
|
+
text = " ".join([t for t in text if isinstance(t, str)])
|
|
1051
|
+
if not isinstance(text, str):
|
|
1052
|
+
continue
|
|
1053
|
+
excerpt = text.strip()
|
|
1054
|
+
if not excerpt:
|
|
1055
|
+
continue
|
|
1056
|
+
|
|
1057
|
+
page = payload.get("page") or payload.get("page_number") or payload.get("pageIndex")
|
|
1058
|
+
url = self._resolve_payload_url(payload, collection_name, getattr(point, "id", ""))
|
|
1059
|
+
title = self._resolve_payload_title(payload, url, page)
|
|
1060
|
+
|
|
1061
|
+
results.append(
|
|
1062
|
+
{
|
|
1063
|
+
"url": url,
|
|
1064
|
+
"title": title,
|
|
1065
|
+
"excerpts": [excerpt],
|
|
1066
|
+
"source_id": payload.get("source_id")
|
|
1067
|
+
or payload.get("sourceId")
|
|
1068
|
+
or payload.get("chunk_id")
|
|
1069
|
+
or payload.get("chunkId")
|
|
1070
|
+
or getattr(point, "id", None),
|
|
1071
|
+
}
|
|
1072
|
+
)
|
|
1073
|
+
|
|
1074
|
+
return SearchResult({"results": results})
|
|
1075
|
+
|
|
863
1076
|
async def search(
|
|
864
1077
|
self,
|
|
865
1078
|
collection_name: str,
|
|
@@ -923,7 +1136,7 @@ class QdrantIndexEngine(Engine):
|
|
|
923
1136
|
if tmp_path.exists():
|
|
924
1137
|
tmp_path.unlink()
|
|
925
1138
|
|
|
926
|
-
async def chunk_and_upsert(
|
|
1139
|
+
async def chunk_and_upsert(
|
|
927
1140
|
self,
|
|
928
1141
|
collection_name: str,
|
|
929
1142
|
text: str | Symbol | None = None,
|
|
@@ -1001,8 +1214,7 @@ class QdrantIndexEngine(Engine):
|
|
|
1001
1214
|
# Add source to metadata if not already present
|
|
1002
1215
|
if metadata is None:
|
|
1003
1216
|
metadata = {}
|
|
1004
|
-
|
|
1005
|
-
metadata["source"] = doc_path.name
|
|
1217
|
+
metadata["source"] = str(doc_path.resolve())
|
|
1006
1218
|
|
|
1007
1219
|
# Handle document_url: download and read file using FileReader
|
|
1008
1220
|
elif document_url is not None:
|
|
@@ -74,7 +74,8 @@ class SearchResult(Result):
|
|
|
74
74
|
self._citations: list[Citation] = []
|
|
75
75
|
try:
|
|
76
76
|
results = self._coerce_results(value)
|
|
77
|
-
|
|
77
|
+
task_meta = self._extract_task_metadata(value)
|
|
78
|
+
text, citations = self._build_text_and_citations(results, task_meta=task_meta)
|
|
78
79
|
self._value = text
|
|
79
80
|
self._citations = citations
|
|
80
81
|
except Exception as e:
|
|
@@ -87,13 +88,26 @@ class SearchResult(Result):
|
|
|
87
88
|
results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
|
|
88
89
|
if not results:
|
|
89
90
|
return []
|
|
90
|
-
coerced
|
|
91
|
+
coerced = []
|
|
91
92
|
for item in results:
|
|
92
93
|
if item is None:
|
|
93
94
|
continue
|
|
94
95
|
coerced.append(_item_to_mapping(item))
|
|
95
96
|
return coerced
|
|
96
97
|
|
|
98
|
+
def _extract_task_metadata(self, raw: Any) -> dict[str, Any] | None:
|
|
99
|
+
if not isinstance(raw, dict):
|
|
100
|
+
return None
|
|
101
|
+
task_output = raw.get("task_output")
|
|
102
|
+
if task_output is None:
|
|
103
|
+
return None
|
|
104
|
+
output_value = task_output.get("output") if isinstance(task_output, dict) else None
|
|
105
|
+
return {
|
|
106
|
+
"reasoning": raw.get("task_reasoning"),
|
|
107
|
+
"answer": output_value,
|
|
108
|
+
"confidence": raw.get("task_confidence"),
|
|
109
|
+
}
|
|
110
|
+
|
|
97
111
|
def _normalize_url(self, url: str) -> str:
|
|
98
112
|
parts = urlsplit(url)
|
|
99
113
|
scheme = parts.scheme.lower() if parts.scheme else "https"
|
|
@@ -139,11 +153,40 @@ class SearchResult(Result):
|
|
|
139
153
|
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
|
140
154
|
return cleaned.strip()
|
|
141
155
|
|
|
142
|
-
def _build_text_and_citations(
|
|
143
|
-
|
|
144
|
-
|
|
156
|
+
def _build_text_and_citations(
|
|
157
|
+
self, results: list[dict[str, Any]], *, task_meta: dict[str, Any] | None = None
|
|
158
|
+
):
|
|
159
|
+
pieces = []
|
|
160
|
+
citations = []
|
|
145
161
|
cursor = 0
|
|
146
|
-
|
|
162
|
+
|
|
163
|
+
if task_meta:
|
|
164
|
+
reasoning = task_meta.get("reasoning")
|
|
165
|
+
answer = task_meta.get("answer")
|
|
166
|
+
confidence = task_meta.get("confidence")
|
|
167
|
+
|
|
168
|
+
if reasoning:
|
|
169
|
+
block = f"<reasoning>\n{reasoning}\n</reasoning>"
|
|
170
|
+
pieces.append(block)
|
|
171
|
+
cursor += len(block)
|
|
172
|
+
|
|
173
|
+
if answer:
|
|
174
|
+
if pieces:
|
|
175
|
+
pieces.append("\n\n")
|
|
176
|
+
cursor += 2
|
|
177
|
+
block = f"<answer>\n{answer}\n</answer>"
|
|
178
|
+
pieces.append(block)
|
|
179
|
+
cursor += len(block)
|
|
180
|
+
|
|
181
|
+
if confidence:
|
|
182
|
+
if pieces:
|
|
183
|
+
pieces.append("\n\n")
|
|
184
|
+
cursor += 2
|
|
185
|
+
block = f"<answer_confidence>\n{confidence}\n</answer_confidence>"
|
|
186
|
+
pieces.append(block)
|
|
187
|
+
cursor += len(block)
|
|
188
|
+
|
|
189
|
+
seen_urls = set()
|
|
147
190
|
cid = 1
|
|
148
191
|
separator = "\n\n---\n\n"
|
|
149
192
|
|
|
@@ -158,13 +201,8 @@ class SearchResult(Result):
|
|
|
158
201
|
|
|
159
202
|
title = str(item.get("title") or "") or urlsplit(normalized_url).netloc
|
|
160
203
|
excerpts = item.get("excerpts") or []
|
|
161
|
-
excerpt_parts
|
|
162
|
-
for
|
|
163
|
-
if not isinstance(ex, str):
|
|
164
|
-
continue
|
|
165
|
-
sanitized = self._sanitize_excerpt(ex)
|
|
166
|
-
if sanitized:
|
|
167
|
-
excerpt_parts.append(sanitized)
|
|
204
|
+
excerpt_parts = [self._sanitize_excerpt(ex) for ex in excerpts]
|
|
205
|
+
excerpt_parts = [p for p in excerpt_parts if p]
|
|
168
206
|
if not excerpt_parts:
|
|
169
207
|
continue
|
|
170
208
|
|
|
@@ -255,16 +293,14 @@ class ExtractResult(Result):
|
|
|
255
293
|
super().__init__(value, **kwargs)
|
|
256
294
|
try:
|
|
257
295
|
results = self._coerce_results(value)
|
|
258
|
-
content_parts
|
|
296
|
+
content_parts = []
|
|
259
297
|
for r in results:
|
|
260
|
-
excerpts = r.get("excerpts") or []
|
|
261
298
|
full = r.get("full_content")
|
|
262
|
-
if
|
|
299
|
+
if full is not None:
|
|
263
300
|
content_parts.append(full)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
content_parts.extend([s for s in excerpts if isinstance(s, str)])
|
|
301
|
+
else:
|
|
302
|
+
excerpts = r.get("excerpts") or []
|
|
303
|
+
content_parts.extend(excerpts)
|
|
268
304
|
self._value = "\n\n".join(content_parts)
|
|
269
305
|
except Exception as e:
|
|
270
306
|
self._value = None
|
|
@@ -276,7 +312,7 @@ class ExtractResult(Result):
|
|
|
276
312
|
results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
|
|
277
313
|
if not results:
|
|
278
314
|
return []
|
|
279
|
-
coerced
|
|
315
|
+
coerced = []
|
|
280
316
|
for item in results:
|
|
281
317
|
if item is None:
|
|
282
318
|
continue
|
|
@@ -344,8 +380,8 @@ class ParallelEngine(Engine):
|
|
|
344
380
|
def _normalize_include_domains(self, domains: list[str] | None) -> list[str]:
|
|
345
381
|
if not isinstance(domains, list):
|
|
346
382
|
return []
|
|
347
|
-
seen
|
|
348
|
-
out
|
|
383
|
+
seen = set()
|
|
384
|
+
out = []
|
|
349
385
|
for d in domains:
|
|
350
386
|
netloc = self._extract_netloc(d)
|
|
351
387
|
if not netloc or netloc in seen:
|
|
@@ -361,8 +397,8 @@ class ParallelEngine(Engine):
|
|
|
361
397
|
def _normalize_exclude_domains(self, domains: list[str] | None) -> list[str]:
|
|
362
398
|
if not isinstance(domains, list):
|
|
363
399
|
return []
|
|
364
|
-
seen
|
|
365
|
-
out
|
|
400
|
+
seen = set()
|
|
401
|
+
out = []
|
|
366
402
|
for d in domains:
|
|
367
403
|
netloc = self._extract_netloc(d)
|
|
368
404
|
if not netloc or netloc in seen:
|
|
@@ -382,7 +418,7 @@ class ParallelEngine(Engine):
|
|
|
382
418
|
text = value.strip()
|
|
383
419
|
return [text] if text else []
|
|
384
420
|
if isinstance(value, list):
|
|
385
|
-
cleaned
|
|
421
|
+
cleaned = []
|
|
386
422
|
for item in value:
|
|
387
423
|
if item is None:
|
|
388
424
|
continue
|
|
@@ -429,7 +465,7 @@ class ParallelEngine(Engine):
|
|
|
429
465
|
excerpts = {"max_chars_per_result": max_chars_per_result}
|
|
430
466
|
include = self._normalize_include_domains(kwargs.get("allowed_domains"))
|
|
431
467
|
exclude = self._normalize_exclude_domains(kwargs.get("excluded_domains"))
|
|
432
|
-
source_policy
|
|
468
|
+
source_policy = None
|
|
433
469
|
if include or exclude:
|
|
434
470
|
source_policy = {}
|
|
435
471
|
if include:
|
|
@@ -457,7 +493,7 @@ class ParallelEngine(Engine):
|
|
|
457
493
|
|
|
458
494
|
include = self._normalize_include_domains(kwargs.get("allowed_domains"))
|
|
459
495
|
exclude = self._normalize_exclude_domains(kwargs.get("excluded_domains"))
|
|
460
|
-
source_policy
|
|
496
|
+
source_policy = None
|
|
461
497
|
if include or exclude:
|
|
462
498
|
source_policy = {}
|
|
463
499
|
if include:
|
|
@@ -542,7 +578,7 @@ class ParallelEngine(Engine):
|
|
|
542
578
|
source_policy: dict[str, Any] | None,
|
|
543
579
|
task_spec: Any,
|
|
544
580
|
):
|
|
545
|
-
task_kwargs
|
|
581
|
+
task_kwargs = {
|
|
546
582
|
"input": task_input,
|
|
547
583
|
"processor": processor,
|
|
548
584
|
}
|
|
@@ -559,7 +595,7 @@ class ParallelEngine(Engine):
|
|
|
559
595
|
UserMessage(f"Failed to create Parallel task: {e}", raise_with=ValueError)
|
|
560
596
|
|
|
561
597
|
def _fetch_task_result(self, run_id: str, *, timeout: Any, api_timeout: int | None):
|
|
562
|
-
result_kwargs
|
|
598
|
+
result_kwargs = {}
|
|
563
599
|
if api_timeout is not None:
|
|
564
600
|
result_kwargs["api_timeout"] = api_timeout
|
|
565
601
|
if timeout is not None:
|
|
@@ -570,36 +606,40 @@ class ParallelEngine(Engine):
|
|
|
570
606
|
UserMessage(f"Failed to fetch Parallel task result: {e}", raise_with=ValueError)
|
|
571
607
|
|
|
572
608
|
def _task_result_to_search_payload(self, task_result: Any) -> dict[str, Any]:
|
|
573
|
-
payload
|
|
574
|
-
output =
|
|
609
|
+
payload = {"results": []}
|
|
610
|
+
output = task_result.output
|
|
575
611
|
if output is None:
|
|
576
612
|
return payload
|
|
577
613
|
|
|
578
|
-
basis_items =
|
|
614
|
+
basis_items = output.basis or []
|
|
579
615
|
for idx, basis in enumerate(basis_items):
|
|
580
616
|
payload["results"].extend(self._basis_to_results(basis, basis_index=idx))
|
|
581
617
|
|
|
582
618
|
if not payload["results"]:
|
|
583
619
|
payload["results"].append(self._task_fallback_result(output, basis_items))
|
|
584
620
|
|
|
585
|
-
payload["task_output"] =
|
|
586
|
-
payload["task_output_type"] =
|
|
621
|
+
payload["task_output"] = output.content
|
|
622
|
+
payload["task_output_type"] = output.type
|
|
623
|
+
|
|
624
|
+
if basis_items:
|
|
625
|
+
first_basis = basis_items[0]
|
|
626
|
+
payload["task_reasoning"] = first_basis.reasoning
|
|
627
|
+
payload["task_confidence"] = first_basis.confidence
|
|
628
|
+
|
|
587
629
|
return payload
|
|
588
630
|
|
|
589
631
|
def _basis_to_results(self, basis: Any, *, basis_index: int) -> list[dict[str, Any]]:
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
raw_field = getattr(basis, "field", "") or ""
|
|
593
|
-
field_title = raw_field if isinstance(raw_field, str) else str(raw_field)
|
|
632
|
+
reasoning = basis.reasoning or ""
|
|
633
|
+
field_title = basis.field or ""
|
|
594
634
|
if not field_title.strip():
|
|
595
635
|
field_title = "Parallel Task Output"
|
|
596
|
-
citations =
|
|
636
|
+
citations = basis.citations or []
|
|
597
637
|
if not citations:
|
|
598
638
|
if not reasoning:
|
|
599
639
|
return []
|
|
600
640
|
citations = [None]
|
|
601
641
|
|
|
602
|
-
results
|
|
642
|
+
results = []
|
|
603
643
|
# Convert field titles to lowercase slugs by swapping non-alphanumerics for hyphens.
|
|
604
644
|
slug = re.sub(r"[^a-z0-9]+", "-", field_title.lower()).strip("-") or "field"
|
|
605
645
|
basis_url = f"parallel://task-output/{basis_index:04d}-{slug}"
|
|
@@ -609,10 +649,9 @@ class ParallelEngine(Engine):
|
|
|
609
649
|
title = field_title
|
|
610
650
|
excerpts = [reasoning]
|
|
611
651
|
else:
|
|
612
|
-
url = str(
|
|
613
|
-
title = str(
|
|
614
|
-
|
|
615
|
-
excerpts = [snippet for snippet in raw_excerpts if isinstance(snippet, str)]
|
|
652
|
+
url = str(citation.url or "")
|
|
653
|
+
title = str(citation.title or field_title)
|
|
654
|
+
excerpts = citation.excerpts or []
|
|
616
655
|
if not excerpts and reasoning:
|
|
617
656
|
excerpts = [reasoning]
|
|
618
657
|
results.append(
|
|
@@ -625,7 +664,7 @@ class ParallelEngine(Engine):
|
|
|
625
664
|
return results
|
|
626
665
|
|
|
627
666
|
def _task_fallback_result(self, output: Any, basis_items: list[Any]) -> dict[str, Any]:
|
|
628
|
-
content =
|
|
667
|
+
content = output.content
|
|
629
668
|
if isinstance(content, str):
|
|
630
669
|
snippet = content
|
|
631
670
|
elif isinstance(content, (dict, list)):
|
|
@@ -633,9 +672,9 @@ class ParallelEngine(Engine):
|
|
|
633
672
|
else:
|
|
634
673
|
snippet = str(content or "")
|
|
635
674
|
if not snippet:
|
|
636
|
-
extra_reasoning
|
|
675
|
+
extra_reasoning = []
|
|
637
676
|
for basis in basis_items:
|
|
638
|
-
raw_value =
|
|
677
|
+
raw_value = basis.reasoning or ""
|
|
639
678
|
if isinstance(raw_value, str):
|
|
640
679
|
extra_reasoning.append(raw_value)
|
|
641
680
|
else:
|
|
@@ -665,13 +704,13 @@ class ParallelEngine(Engine):
|
|
|
665
704
|
def forward(self, argument):
|
|
666
705
|
kwargs = argument.kwargs
|
|
667
706
|
# Route based on presence of URL vs Query
|
|
668
|
-
url =
|
|
707
|
+
url = argument.prop.url or kwargs.get("url")
|
|
669
708
|
if url:
|
|
670
709
|
return self._extract(str(url), kwargs)
|
|
671
710
|
|
|
672
|
-
raw_query =
|
|
711
|
+
raw_query = argument.prop.prepared_input
|
|
673
712
|
if raw_query is None:
|
|
674
|
-
raw_query =
|
|
713
|
+
raw_query = argument.prop.query
|
|
675
714
|
search_queries = self._coerce_search_queries(raw_query)
|
|
676
715
|
if not search_queries:
|
|
677
716
|
UserMessage(
|
|
@@ -685,11 +724,11 @@ class ParallelEngine(Engine):
|
|
|
685
724
|
|
|
686
725
|
def prepare(self, argument):
|
|
687
726
|
# For scraping: store URL directly. For search: pass through query string.
|
|
688
|
-
url = argument.kwargs.get("url") or
|
|
727
|
+
url = argument.kwargs.get("url") or argument.prop.url
|
|
689
728
|
if url:
|
|
690
729
|
argument.prop.prepared_input = str(url)
|
|
691
730
|
return
|
|
692
|
-
query =
|
|
731
|
+
query = argument.prop.query
|
|
693
732
|
if isinstance(query, list):
|
|
694
733
|
argument.prop.prepared_input = self._coerce_search_queries(query)
|
|
695
734
|
return
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from ...backend.engines.index.engine_qdrant import QdrantIndexEngine
|
|
4
|
+
from ...symbol import Expression, Symbol
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from ...backend.engines.index.engine_qdrant import SearchResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class local_search(Expression):
|
|
11
|
+
def __init__(self, index_name: str = QdrantIndexEngine._default_index_name, *args, **kwargs):
|
|
12
|
+
super().__init__(*args, **kwargs)
|
|
13
|
+
self.index_name = index_name
|
|
14
|
+
self.name = self.__class__.__name__
|
|
15
|
+
|
|
16
|
+
def search(self, query: Symbol, **kwargs) -> "SearchResult":
|
|
17
|
+
symbol = self._to_symbol(query)
|
|
18
|
+
options = dict(kwargs)
|
|
19
|
+
|
|
20
|
+
index_name = options.pop("collection_name", options.pop("index_name", self.index_name))
|
|
21
|
+
|
|
22
|
+
# Normalize limit/top_k/index_top_k
|
|
23
|
+
index_top_k = options.pop("index_top_k", None)
|
|
24
|
+
if index_top_k is None:
|
|
25
|
+
top_k = options.pop("top_k", None)
|
|
26
|
+
limit = options.pop("limit", None)
|
|
27
|
+
index_top_k = top_k if top_k is not None else limit
|
|
28
|
+
if index_top_k is not None:
|
|
29
|
+
options["index_top_k"] = index_top_k
|
|
30
|
+
|
|
31
|
+
# Bypass decorator/EngineRepository pipeline entirely (and thus `forward()`).
|
|
32
|
+
# We query Qdrant directly and then format results into the same SearchResult
|
|
33
|
+
# structure used by `parallel.search` (citations, inline markers, etc.).
|
|
34
|
+
engine = QdrantIndexEngine(index_name=index_name)
|
|
35
|
+
try:
|
|
36
|
+
score_threshold = options.pop("score_threshold", None)
|
|
37
|
+
raw_filter = options.pop("query_filter", options.pop("filter", None))
|
|
38
|
+
query_filter = engine._build_query_filter(raw_filter)
|
|
39
|
+
|
|
40
|
+
# Keep `with_payload` default aligned with engine behavior; let caller override.
|
|
41
|
+
with_payload = options.pop("with_payload", True)
|
|
42
|
+
with_vectors = options.pop("with_vectors", False)
|
|
43
|
+
|
|
44
|
+
points = engine._search_sync(
|
|
45
|
+
collection_name=index_name,
|
|
46
|
+
query_vector=symbol.embedding,
|
|
47
|
+
limit=options.pop("index_top_k", engine.index_top_k),
|
|
48
|
+
score_threshold=score_threshold,
|
|
49
|
+
query_filter=query_filter,
|
|
50
|
+
with_payload=with_payload,
|
|
51
|
+
with_vectors=with_vectors,
|
|
52
|
+
**options,
|
|
53
|
+
)
|
|
54
|
+
result = engine._format_search_results(points, index_name)
|
|
55
|
+
finally:
|
|
56
|
+
del engine
|
|
57
|
+
return result
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -136,6 +136,8 @@ Requires-Dist: symbolicai[serpapi]; extra == "all"
|
|
|
136
136
|
Requires-Dist: symbolicai[services]; extra == "all"
|
|
137
137
|
Requires-Dist: symbolicai[solver]; extra == "all"
|
|
138
138
|
Requires-Dist: symbolicai[qdrant]; extra == "all"
|
|
139
|
+
Provides-Extra: dev
|
|
140
|
+
Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
|
|
139
141
|
Dynamic: license-file
|
|
140
142
|
|
|
141
143
|
# **SymbolicAI: A neuro-symbolic perspective on LLMs**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
|
|
2
|
-
symai/__init__.py,sha256=
|
|
2
|
+
symai/__init__.py,sha256=s7UwW7LIsUjcCHapKxUsO0MhWH-98vO3gPUhsNxOZW8,18530
|
|
3
3
|
symai/chat.py,sha256=DCEbmZ96wv-eitAVt6-oF6PT3JM3cT59Iy3r2Hucd_M,14100
|
|
4
4
|
symai/components.py,sha256=s10kLvwAOjSBQQohoHGtAIKs0UHHCd_HhiRvMbNtIH0,64685
|
|
5
5
|
symai/constraints.py,sha256=ljjB9p0qK4DrDl_u5G_Y-Y6WAH5ZHANIqLLxRtwcORs,1980
|
|
@@ -41,7 +41,7 @@ symai/backend/engines/files/engine_io.py,sha256=4eYBz44rQYWD7VO6Pn7hVF_cOnqNuolo
|
|
|
41
41
|
symai/backend/engines/imagecaptioning/engine_blip2.py,sha256=8lTzc8sQpuNY4AUb_ZweRKr95v-sFtTykT5ennVf6g0,2915
|
|
42
42
|
symai/backend/engines/imagecaptioning/engine_llavacpp_client.py,sha256=jBsLZv0Laa4tuPyX0VQ7uwyldyO3aYIbbj73WjTbceM,6793
|
|
43
43
|
symai/backend/engines/index/engine_pinecone.py,sha256=fxCew1ldUdjd9UtqnMuWFDiVz5X5BUIKZtq1iSDhj28,9132
|
|
44
|
-
symai/backend/engines/index/engine_qdrant.py,sha256=
|
|
44
|
+
symai/backend/engines/index/engine_qdrant.py,sha256=U9p0kzYvHE4DjFgxnvnG_8xfEoP_W4dpaBGY5gTFMF4,50994
|
|
45
45
|
symai/backend/engines/index/engine_vectordb.py,sha256=xXU8QaC2BX9O4dDjDCVYgWO4PxQMpmNlhtal6UVtV0o,8541
|
|
46
46
|
symai/backend/engines/lean/engine_lean4.py,sha256=ln5nbQn5szq8nRulbREPLCPQ5bwjM_A5XAGMkfzPdT8,10102
|
|
47
47
|
symai/backend/engines/neurosymbolic/__init__.py,sha256=o7HUmxcYSrIkutGYB-6_Qur3adHyrkVeWroDtqEK-YE,2279
|
|
@@ -61,7 +61,7 @@ symai/backend/engines/ocr/engine_apilayer.py,sha256=UpC3oHBdSM6wlPVqxwMkemBd-Y0R
|
|
|
61
61
|
symai/backend/engines/output/engine_stdout.py,sha256=BWNXACl5U-WYIJnT1pZNwZsTRMzP1XzA0A7o693mmyQ,899
|
|
62
62
|
symai/backend/engines/scrape/engine_requests.py,sha256=yyVFT9JrZ4S6v5U_cykef-tn5iWGl1MAdpqnDaQ70TA,13821
|
|
63
63
|
symai/backend/engines/search/engine_openai.py,sha256=hAEu3vPZzLTvgmNc4BSZDTcNb4ek4xYeOf8xgti2zRs,14248
|
|
64
|
-
symai/backend/engines/search/engine_parallel.py,sha256=
|
|
64
|
+
symai/backend/engines/search/engine_parallel.py,sha256=vhRavd_LStk6grV1aDZiHWfW9v1uDnCLX0BT8smiV84,27008
|
|
65
65
|
symai/backend/engines/search/engine_perplexity.py,sha256=rXnZjMCSiIRuJcNSchE58-f9zWJmYpkKMHONF_XwGnk,4100
|
|
66
66
|
symai/backend/engines/search/engine_serpapi.py,sha256=ZJJBnEDoLjkpxWt_o4vFZanwqojH8ZFBWmWNnEaIbww,3618
|
|
67
67
|
symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=EOUh2GCeEhZ2Av72i_AZ4NSj9e46Pl7Ft6sIErFy6FI,8387
|
|
@@ -100,7 +100,7 @@ symai/extended/solver.py,sha256=Men8FcGlUdUHJCw0lb1rKAwLOGp5-d5Rnuf2sx5Q6PM,1173
|
|
|
100
100
|
symai/extended/summarizer.py,sha256=x7yKOU-tXmvHZxmyKrPoy5_Dy9-Zet1oAcDK8uvQSRI,1052
|
|
101
101
|
symai/extended/taypan_interpreter.py,sha256=yPIcI-NcpNpfDb3r3KiclP9XwzvFo_enoZOgK1JM3NI,4832
|
|
102
102
|
symai/extended/vectordb.py,sha256=npCR9WBfV6RN3OQZuJAELpwz1sM6q1btKqrVaW5jPvs,13546
|
|
103
|
-
symai/extended/interfaces/__init__.py,sha256=
|
|
103
|
+
symai/extended/interfaces/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
104
104
|
symai/extended/interfaces/blip_2.py,sha256=wZYVzql6w_OJMUZc1c2BKx3LHrlapRprx-Q6p99_qxE,463
|
|
105
105
|
symai/extended/interfaces/clip.py,sha256=l6vjEq3cF-wDX9cRPulyiKpDFQB8QI2609GcGtvqt7U,514
|
|
106
106
|
symai/extended/interfaces/console.py,sha256=qeAnG80f95ArADjfpk57AaDA1cHUQSkaUrau2zGNSKs,637
|
|
@@ -110,6 +110,7 @@ symai/extended/interfaces/flux.py,sha256=LTY_I9UtIxnh3Nc4cBPQhQ6upB6CVZIhc1uOnFp
|
|
|
110
110
|
symai/extended/interfaces/gpt_image.py,sha256=Jk5-9og440eZeRAhKmjdyhwP22wX58q0NcFuVhIFWZQ,718
|
|
111
111
|
symai/extended/interfaces/input.py,sha256=CFMLf2j_a-rZ1ApaEwfgqZmWVS7_1yj_u6iiqtiOGPs,456
|
|
112
112
|
symai/extended/interfaces/llava.py,sha256=yCItfGYSk35RazhEfHR4R324h-R6W5DjZYeJBonDkRU,433
|
|
113
|
+
symai/extended/interfaces/local_search.py,sha256=AHHRsYCUm4VttGSl_HAk5kpH34e0x_uzvgy1OXSubSs,2408
|
|
113
114
|
symai/extended/interfaces/naive_scrape.py,sha256=KPjTSBXSCr5zwHwIPgF-VwLSTD2OjVcL4xALNX4l9-4,682
|
|
114
115
|
symai/extended/interfaces/naive_vectordb.py,sha256=fm7DBMYYnSx7Ma7eNnCmuOVyQwNGnkiDR31oV-qNrJA,1348
|
|
115
116
|
symai/extended/interfaces/ocr.py,sha256=MMxgp8ZKoM44doJPZzzrBVh2VxChs6faFu2uFYnbzfU,563
|
|
@@ -162,9 +163,9 @@ symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
|
162
163
|
symai/server/huggingface_server.py,sha256=wSAVqFiKQsCu5UB2YYVpxJBhJ7GgQBBfePxNi265yP8,9039
|
|
163
164
|
symai/server/llama_cpp_server.py,sha256=-WPTNB2cbnwtnpES4AtPM__MCasDKl83jr94JGS9tmI,2144
|
|
164
165
|
symai/server/qdrant_server.py,sha256=l4r4rz29c7cO1dapXO0LQ4sHW4WF44keuz7j8v5azMc,9854
|
|
165
|
-
symbolicai-1.
|
|
166
|
-
symbolicai-1.
|
|
167
|
-
symbolicai-1.
|
|
168
|
-
symbolicai-1.
|
|
169
|
-
symbolicai-1.
|
|
170
|
-
symbolicai-1.
|
|
166
|
+
symbolicai-1.4.0.dist-info/licenses/LICENSE,sha256=9vRFudlJ1ghVfra5lcCUIYQCqnZSYcBLjLHbGRsrQCs,1505
|
|
167
|
+
symbolicai-1.4.0.dist-info/METADATA,sha256=dlAY-AhPA52x_fmXU-i7h6rA-M1Mf0qJ00OeOUyplGs,23676
|
|
168
|
+
symbolicai-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
169
|
+
symbolicai-1.4.0.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
|
|
170
|
+
symbolicai-1.4.0.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
|
|
171
|
+
symbolicai-1.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|