symbolicai 0.17.5__py3-none-any.whl → 0.17.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +1 -1
- symai/backend/engines/search/engine_openai.py +117 -33
- {symbolicai-0.17.5.dist-info → symbolicai-0.17.6.dist-info}/METADATA +1 -1
- {symbolicai-0.17.5.dist-info → symbolicai-0.17.6.dist-info}/RECORD +7 -7
- {symbolicai-0.17.5.dist-info → symbolicai-0.17.6.dist-info}/WHEEL +0 -0
- {symbolicai-0.17.5.dist-info → symbolicai-0.17.6.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.17.5.dist-info → symbolicai-0.17.6.dist-info}/top_level.txt +0 -0
symai/__init__.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
|
-
import
|
|
4
|
+
import re
|
|
4
5
|
from copy import deepcopy
|
|
5
6
|
from dataclasses import dataclass
|
|
7
|
+
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
|
8
|
+
|
|
9
|
+
from openai import OpenAI
|
|
6
10
|
|
|
7
11
|
from ....symbol import Result
|
|
8
12
|
from ....utils import CustomUserWarning
|
|
9
13
|
from ...base import Engine
|
|
10
|
-
from ...settings import SYMAI_CONFIG
|
|
11
14
|
from ...mixin import OPENAI_CHAT_MODELS, OPENAI_REASONING_MODELS
|
|
15
|
+
from ...settings import SYMAI_CONFIG
|
|
12
16
|
|
|
13
17
|
logging.getLogger("requests").setLevel(logging.ERROR)
|
|
14
18
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
|
@@ -16,6 +20,10 @@ logging.getLogger("httpx").setLevel(logging.ERROR)
|
|
|
16
20
|
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
|
17
21
|
|
|
18
22
|
|
|
23
|
+
TRACKING_KEYS = {
|
|
24
|
+
"utm_source" # so far I've only seen this one
|
|
25
|
+
}
|
|
26
|
+
|
|
19
27
|
@dataclass
|
|
20
28
|
class Citation:
|
|
21
29
|
id: str
|
|
@@ -34,32 +42,111 @@ class SearchResult(Result):
|
|
|
34
42
|
if value.get('error'):
|
|
35
43
|
CustomUserWarning(value['error'], raise_with=ValueError)
|
|
36
44
|
try:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
url=annotation.get('url', ''),
|
|
49
|
-
)
|
|
50
|
-
if citation not in citations:
|
|
51
|
-
citations.append(citation)
|
|
52
|
-
self._value = output['content'][0]['text']
|
|
53
|
-
delta = 0
|
|
54
|
-
for citation in citations:
|
|
55
|
-
self._value = self._value[:citation.start - delta] + citation.id + self._value[citation.end - delta:]
|
|
56
|
-
delta += (citation.end - citation.start) - len(citation.id)
|
|
57
|
-
self._citations = citations
|
|
45
|
+
text, annotations = self._extract_text_and_annotations(value)
|
|
46
|
+
if text is None:
|
|
47
|
+
self._value = None
|
|
48
|
+
self._citations = []
|
|
49
|
+
return
|
|
50
|
+
replaced_text, ordered = self._replace_links_with_citations(text, annotations, id_mode="sequential")
|
|
51
|
+
self._value = replaced_text
|
|
52
|
+
self._citations = [
|
|
53
|
+
Citation(id=cid, title=title, url=url, start=0, end=0)
|
|
54
|
+
for cid, title, url in ordered
|
|
55
|
+
]
|
|
58
56
|
|
|
59
57
|
except Exception as e:
|
|
60
58
|
self._value = None
|
|
61
59
|
CustomUserWarning(f"Failed to parse response: {e}", raise_with=ValueError)
|
|
62
60
|
|
|
61
|
+
def _extract_text(self, value) -> str | None:
|
|
62
|
+
text = None
|
|
63
|
+
for output in value.get('output', []):
|
|
64
|
+
if output.get('type') == 'message' and output.get('content'):
|
|
65
|
+
content0 = output['content'][0]
|
|
66
|
+
if 'text' in content0 and content0['text']:
|
|
67
|
+
text = content0['text']
|
|
68
|
+
return text
|
|
69
|
+
|
|
70
|
+
def _extract_text_and_annotations(self, value):
|
|
71
|
+
text = None
|
|
72
|
+
annotations = []
|
|
73
|
+
for output in value.get('output', []):
|
|
74
|
+
if output.get('type') != 'message' or not output.get('content'):
|
|
75
|
+
continue
|
|
76
|
+
for content in output.get('content', []) or []:
|
|
77
|
+
if 'text' in content and content['text']:
|
|
78
|
+
text = content['text']
|
|
79
|
+
anns = content.get('annotations', []) or []
|
|
80
|
+
for ann in anns:
|
|
81
|
+
if ann.get('type') == 'url_citation':
|
|
82
|
+
annotations.append(ann)
|
|
83
|
+
return text, annotations
|
|
84
|
+
|
|
85
|
+
def _normalize_url(self, u: str) -> str:
|
|
86
|
+
parts = urlsplit(u)
|
|
87
|
+
scheme = parts.scheme.lower()
|
|
88
|
+
netloc = parts.netloc.lower()
|
|
89
|
+
path = parts.path.rstrip('/') or '/'
|
|
90
|
+
q = []
|
|
91
|
+
for k, v in parse_qsl(parts.query, keep_blank_values=True):
|
|
92
|
+
kl = k.lower()
|
|
93
|
+
if kl in TRACKING_KEYS or kl.startswith('utm_'):
|
|
94
|
+
continue
|
|
95
|
+
q.append((k, v))
|
|
96
|
+
query = urlencode(q, doseq=True)
|
|
97
|
+
fragment = ''
|
|
98
|
+
return urlunsplit((scheme, netloc, path, query, fragment))
|
|
99
|
+
|
|
100
|
+
def _make_title_map(self, annotations):
|
|
101
|
+
m = {}
|
|
102
|
+
for a in annotations or []:
|
|
103
|
+
url = a.get('url')
|
|
104
|
+
if not url:
|
|
105
|
+
continue
|
|
106
|
+
nu = self._normalize_url(url)
|
|
107
|
+
title = (a.get('title') or '').strip()
|
|
108
|
+
if nu not in m and title:
|
|
109
|
+
m[nu] = title
|
|
110
|
+
return m
|
|
111
|
+
|
|
112
|
+
def _hostname(self, u: str) -> str:
|
|
113
|
+
return urlsplit(u).netloc
|
|
114
|
+
|
|
115
|
+
def _short_hash_id(self, nu: str, length=6) -> str:
|
|
116
|
+
return hashlib.sha1(nu.encode('utf-8')).hexdigest()[:length]
|
|
117
|
+
|
|
118
|
+
def _replace_links_with_citations(self, text: str, annotations, id_mode: str = 'sequential'):
|
|
119
|
+
title_map = self._make_title_map(annotations)
|
|
120
|
+
id_map = {}
|
|
121
|
+
ordered = [] # list of ("[n]", title, normalized_url)
|
|
122
|
+
next_id = 1
|
|
123
|
+
|
|
124
|
+
pattern = re.compile(r"\[([^\]]*?)\]\((https?://[^\s)]+)\)")
|
|
125
|
+
|
|
126
|
+
def _get_id(nu: str) -> str:
|
|
127
|
+
nonlocal next_id
|
|
128
|
+
if id_mode == 'hash':
|
|
129
|
+
return self._short_hash_id(nu)
|
|
130
|
+
if nu not in id_map:
|
|
131
|
+
id_map[nu] = str(next_id)
|
|
132
|
+
t = title_map.get(nu) or self._hostname(nu)
|
|
133
|
+
ordered.append((f"[{id_map[nu]}]", t, nu))
|
|
134
|
+
next_id += 1
|
|
135
|
+
return id_map[nu]
|
|
136
|
+
|
|
137
|
+
def _repl(m):
|
|
138
|
+
link_text, url = m.group(1), m.group(2)
|
|
139
|
+
nu = self._normalize_url(url)
|
|
140
|
+
cid = _get_id(nu)
|
|
141
|
+
title = title_map.get(nu)
|
|
142
|
+
if not title:
|
|
143
|
+
lt = (link_text or '').strip()
|
|
144
|
+
title = lt if (' ' in lt) else self._hostname(nu)
|
|
145
|
+
return f"[{cid}] ({title})"
|
|
146
|
+
|
|
147
|
+
replaced = pattern.sub(_repl, text)
|
|
148
|
+
return replaced, ordered
|
|
149
|
+
|
|
63
150
|
def __str__(self) -> str:
|
|
64
151
|
try:
|
|
65
152
|
return json.dumps(self.raw, indent=2)
|
|
@@ -86,6 +173,10 @@ class GPTXSearchEngine(Engine):
|
|
|
86
173
|
self.api_key = self.config.get('SEARCH_ENGINE_API_KEY')
|
|
87
174
|
self.model = self.config.get('SEARCH_ENGINE_MODEL', 'gpt-4.1') # Default to gpt-4.1 as per docs
|
|
88
175
|
self.name = self.__class__.__name__
|
|
176
|
+
try:
|
|
177
|
+
self.client = OpenAI(api_key=self.api_key)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
CustomUserWarning(f"Failed to initialize OpenAI client: {e}", raise_with=ValueError)
|
|
89
180
|
|
|
90
181
|
def id(self) -> str:
|
|
91
182
|
if self.config.get('SEARCH_ENGINE_API_KEY') and \
|
|
@@ -117,19 +208,12 @@ class GPTXSearchEngine(Engine):
|
|
|
117
208
|
"model": self.model,
|
|
118
209
|
"input": messages,
|
|
119
210
|
"tools": [tool_definition],
|
|
120
|
-
"tool_choice": {"type": "web_search_preview"} # force the use of web search tool
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
headers = {
|
|
124
|
-
"Authorization": f"Bearer {self.api_key}",
|
|
125
|
-
"Content-Type": "application/json",
|
|
126
|
-
"OpenAI-Beta": "assistants=v1" # Required for some beta features, might be useful
|
|
211
|
+
"tool_choice": {"type": "web_search_preview"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
|
|
127
212
|
}
|
|
128
|
-
api_url = "https://api.openai.com/v1/responses"
|
|
129
213
|
|
|
130
214
|
try:
|
|
131
|
-
res =
|
|
132
|
-
res = SearchResult(res.
|
|
215
|
+
res = self.client.responses.create(**payload)
|
|
216
|
+
res = SearchResult(res.dict())
|
|
133
217
|
except Exception as e:
|
|
134
218
|
CustomUserWarning(f"Failed to make request: {e}", raise_with=ValueError)
|
|
135
219
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 0.17.
|
|
3
|
+
Version: 0.17.6
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
6
|
Project-URL: Homepage, https://extensity.ai
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
|
|
2
|
-
symai/__init__.py,sha256=
|
|
2
|
+
symai/__init__.py,sha256=bL2mJzfktI9IqkdePuW2tDxOFfDNn22bdqM01brcGus,16464
|
|
3
3
|
symai/chat.py,sha256=vqEe7NqSWdzr9ixkko_094SR1LIbgPLcZxQ8W7782N4,12775
|
|
4
4
|
symai/components.py,sha256=L_Kbuyc0JW6c8zkVRd406HzPifLM3ZytRVas6EHE7Ls,50617
|
|
5
5
|
symai/constraints.py,sha256=S1ywLB8nFQy4-beDoJz6IvLTiZHGR8Fu5RNTY4v5zG0,1641
|
|
@@ -55,7 +55,7 @@ symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py,sha256=YgxR
|
|
|
55
55
|
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py,sha256=QVbyZybUPSAQHiA66V6we2W2dAsk52g1kJ7kMdGqb9I,22951
|
|
56
56
|
symai/backend/engines/ocr/engine_apilayer.py,sha256=hZo4lk0ECRIzaGEpmCSNjR5Xrh8mwkKMD2ddpdgioVU,2399
|
|
57
57
|
symai/backend/engines/output/engine_stdout.py,sha256=2hhyhMHFJTfjVRaODYd_5XPnV9pT03URcpYbeMY_USU,951
|
|
58
|
-
symai/backend/engines/search/engine_openai.py,sha256=
|
|
58
|
+
symai/backend/engines/search/engine_openai.py,sha256=zARzTr0qO7p8o1TCS441KNIgtZR9-mjdjyICO2ajtVw,8492
|
|
59
59
|
symai/backend/engines/search/engine_perplexity.py,sha256=yxuhGaA38d1FRbLv6piLll0QDxCCyBVK6eeomjYNryM,4157
|
|
60
60
|
symai/backend/engines/search/engine_serpapi.py,sha256=UqvGHs1J9BOv05C0FJUQjbz29_VuWncIkeDwlRPUilU,3698
|
|
61
61
|
symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=LRsXliCpHDFPFaE-vPky3-DLkmYwmwe2mxfF0Brz4Wg,8220
|
|
@@ -152,8 +152,8 @@ symai/ops/primitives.py,sha256=EaB2Ekx9yGNDaQa3aKS5KpuEr5awAUbO3OcBbufI-l4,11072
|
|
|
152
152
|
symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
153
153
|
symai/server/huggingface_server.py,sha256=UpSBflnQaenDjY1AAn5LUYeg5J4gJLWiMuC5DcoIV3E,8743
|
|
154
154
|
symai/server/llama_cpp_server.py,sha256=qVCldTdcQhK2YCu7sDNSYziu1p2AQieqMFfY028-yOc,2049
|
|
155
|
-
symbolicai-0.17.
|
|
156
|
-
symbolicai-0.17.
|
|
157
|
-
symbolicai-0.17.
|
|
158
|
-
symbolicai-0.17.
|
|
159
|
-
symbolicai-0.17.
|
|
155
|
+
symbolicai-0.17.6.dist-info/METADATA,sha256=UQTu70XQcvbzRzoeq8TBdbxbiUnRpG41JzVaR4XJ1j8,21327
|
|
156
|
+
symbolicai-0.17.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
157
|
+
symbolicai-0.17.6.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
|
|
158
|
+
symbolicai-0.17.6.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
|
|
159
|
+
symbolicai-0.17.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|