symbolicai 0.17.4__py3-none-any.whl → 0.17.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +1 -1
- symai/backend/engines/search/engine_openai.py +117 -33
- symai/models/base.py +58 -18
- {symbolicai-0.17.4.dist-info → symbolicai-0.17.6.dist-info}/METADATA +1 -1
- {symbolicai-0.17.4.dist-info → symbolicai-0.17.6.dist-info}/RECORD +8 -8
- {symbolicai-0.17.4.dist-info → symbolicai-0.17.6.dist-info}/WHEEL +0 -0
- {symbolicai-0.17.4.dist-info → symbolicai-0.17.6.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.17.4.dist-info → symbolicai-0.17.6.dist-info}/top_level.txt +0 -0
symai/__init__.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
|
-
import
|
|
4
|
+
import re
|
|
4
5
|
from copy import deepcopy
|
|
5
6
|
from dataclasses import dataclass
|
|
7
|
+
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
|
8
|
+
|
|
9
|
+
from openai import OpenAI
|
|
6
10
|
|
|
7
11
|
from ....symbol import Result
|
|
8
12
|
from ....utils import CustomUserWarning
|
|
9
13
|
from ...base import Engine
|
|
10
|
-
from ...settings import SYMAI_CONFIG
|
|
11
14
|
from ...mixin import OPENAI_CHAT_MODELS, OPENAI_REASONING_MODELS
|
|
15
|
+
from ...settings import SYMAI_CONFIG
|
|
12
16
|
|
|
13
17
|
logging.getLogger("requests").setLevel(logging.ERROR)
|
|
14
18
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
|
@@ -16,6 +20,10 @@ logging.getLogger("httpx").setLevel(logging.ERROR)
|
|
|
16
20
|
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
|
17
21
|
|
|
18
22
|
|
|
23
|
+
TRACKING_KEYS = {
|
|
24
|
+
"utm_source" # so far I've only seen this one
|
|
25
|
+
}
|
|
26
|
+
|
|
19
27
|
@dataclass
|
|
20
28
|
class Citation:
|
|
21
29
|
id: str
|
|
@@ -34,32 +42,111 @@ class SearchResult(Result):
|
|
|
34
42
|
if value.get('error'):
|
|
35
43
|
CustomUserWarning(value['error'], raise_with=ValueError)
|
|
36
44
|
try:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
url=annotation.get('url', ''),
|
|
49
|
-
)
|
|
50
|
-
if citation not in citations:
|
|
51
|
-
citations.append(citation)
|
|
52
|
-
self._value = output['content'][0]['text']
|
|
53
|
-
delta = 0
|
|
54
|
-
for citation in citations:
|
|
55
|
-
self._value = self._value[:citation.start - delta] + citation.id + self._value[citation.end - delta:]
|
|
56
|
-
delta += (citation.end - citation.start) - len(citation.id)
|
|
57
|
-
self._citations = citations
|
|
45
|
+
text, annotations = self._extract_text_and_annotations(value)
|
|
46
|
+
if text is None:
|
|
47
|
+
self._value = None
|
|
48
|
+
self._citations = []
|
|
49
|
+
return
|
|
50
|
+
replaced_text, ordered = self._replace_links_with_citations(text, annotations, id_mode="sequential")
|
|
51
|
+
self._value = replaced_text
|
|
52
|
+
self._citations = [
|
|
53
|
+
Citation(id=cid, title=title, url=url, start=0, end=0)
|
|
54
|
+
for cid, title, url in ordered
|
|
55
|
+
]
|
|
58
56
|
|
|
59
57
|
except Exception as e:
|
|
60
58
|
self._value = None
|
|
61
59
|
CustomUserWarning(f"Failed to parse response: {e}", raise_with=ValueError)
|
|
62
60
|
|
|
61
|
+
def _extract_text(self, value) -> str | None:
|
|
62
|
+
text = None
|
|
63
|
+
for output in value.get('output', []):
|
|
64
|
+
if output.get('type') == 'message' and output.get('content'):
|
|
65
|
+
content0 = output['content'][0]
|
|
66
|
+
if 'text' in content0 and content0['text']:
|
|
67
|
+
text = content0['text']
|
|
68
|
+
return text
|
|
69
|
+
|
|
70
|
+
def _extract_text_and_annotations(self, value):
|
|
71
|
+
text = None
|
|
72
|
+
annotations = []
|
|
73
|
+
for output in value.get('output', []):
|
|
74
|
+
if output.get('type') != 'message' or not output.get('content'):
|
|
75
|
+
continue
|
|
76
|
+
for content in output.get('content', []) or []:
|
|
77
|
+
if 'text' in content and content['text']:
|
|
78
|
+
text = content['text']
|
|
79
|
+
anns = content.get('annotations', []) or []
|
|
80
|
+
for ann in anns:
|
|
81
|
+
if ann.get('type') == 'url_citation':
|
|
82
|
+
annotations.append(ann)
|
|
83
|
+
return text, annotations
|
|
84
|
+
|
|
85
|
+
def _normalize_url(self, u: str) -> str:
|
|
86
|
+
parts = urlsplit(u)
|
|
87
|
+
scheme = parts.scheme.lower()
|
|
88
|
+
netloc = parts.netloc.lower()
|
|
89
|
+
path = parts.path.rstrip('/') or '/'
|
|
90
|
+
q = []
|
|
91
|
+
for k, v in parse_qsl(parts.query, keep_blank_values=True):
|
|
92
|
+
kl = k.lower()
|
|
93
|
+
if kl in TRACKING_KEYS or kl.startswith('utm_'):
|
|
94
|
+
continue
|
|
95
|
+
q.append((k, v))
|
|
96
|
+
query = urlencode(q, doseq=True)
|
|
97
|
+
fragment = ''
|
|
98
|
+
return urlunsplit((scheme, netloc, path, query, fragment))
|
|
99
|
+
|
|
100
|
+
def _make_title_map(self, annotations):
|
|
101
|
+
m = {}
|
|
102
|
+
for a in annotations or []:
|
|
103
|
+
url = a.get('url')
|
|
104
|
+
if not url:
|
|
105
|
+
continue
|
|
106
|
+
nu = self._normalize_url(url)
|
|
107
|
+
title = (a.get('title') or '').strip()
|
|
108
|
+
if nu not in m and title:
|
|
109
|
+
m[nu] = title
|
|
110
|
+
return m
|
|
111
|
+
|
|
112
|
+
def _hostname(self, u: str) -> str:
|
|
113
|
+
return urlsplit(u).netloc
|
|
114
|
+
|
|
115
|
+
def _short_hash_id(self, nu: str, length=6) -> str:
|
|
116
|
+
return hashlib.sha1(nu.encode('utf-8')).hexdigest()[:length]
|
|
117
|
+
|
|
118
|
+
def _replace_links_with_citations(self, text: str, annotations, id_mode: str = 'sequential'):
|
|
119
|
+
title_map = self._make_title_map(annotations)
|
|
120
|
+
id_map = {}
|
|
121
|
+
ordered = [] # list of ("[n]", title, normalized_url)
|
|
122
|
+
next_id = 1
|
|
123
|
+
|
|
124
|
+
pattern = re.compile(r"\[([^\]]*?)\]\((https?://[^\s)]+)\)")
|
|
125
|
+
|
|
126
|
+
def _get_id(nu: str) -> str:
|
|
127
|
+
nonlocal next_id
|
|
128
|
+
if id_mode == 'hash':
|
|
129
|
+
return self._short_hash_id(nu)
|
|
130
|
+
if nu not in id_map:
|
|
131
|
+
id_map[nu] = str(next_id)
|
|
132
|
+
t = title_map.get(nu) or self._hostname(nu)
|
|
133
|
+
ordered.append((f"[{id_map[nu]}]", t, nu))
|
|
134
|
+
next_id += 1
|
|
135
|
+
return id_map[nu]
|
|
136
|
+
|
|
137
|
+
def _repl(m):
|
|
138
|
+
link_text, url = m.group(1), m.group(2)
|
|
139
|
+
nu = self._normalize_url(url)
|
|
140
|
+
cid = _get_id(nu)
|
|
141
|
+
title = title_map.get(nu)
|
|
142
|
+
if not title:
|
|
143
|
+
lt = (link_text or '').strip()
|
|
144
|
+
title = lt if (' ' in lt) else self._hostname(nu)
|
|
145
|
+
return f"[{cid}] ({title})"
|
|
146
|
+
|
|
147
|
+
replaced = pattern.sub(_repl, text)
|
|
148
|
+
return replaced, ordered
|
|
149
|
+
|
|
63
150
|
def __str__(self) -> str:
|
|
64
151
|
try:
|
|
65
152
|
return json.dumps(self.raw, indent=2)
|
|
@@ -86,6 +173,10 @@ class GPTXSearchEngine(Engine):
|
|
|
86
173
|
self.api_key = self.config.get('SEARCH_ENGINE_API_KEY')
|
|
87
174
|
self.model = self.config.get('SEARCH_ENGINE_MODEL', 'gpt-4.1') # Default to gpt-4.1 as per docs
|
|
88
175
|
self.name = self.__class__.__name__
|
|
176
|
+
try:
|
|
177
|
+
self.client = OpenAI(api_key=self.api_key)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
CustomUserWarning(f"Failed to initialize OpenAI client: {e}", raise_with=ValueError)
|
|
89
180
|
|
|
90
181
|
def id(self) -> str:
|
|
91
182
|
if self.config.get('SEARCH_ENGINE_API_KEY') and \
|
|
@@ -117,19 +208,12 @@ class GPTXSearchEngine(Engine):
|
|
|
117
208
|
"model": self.model,
|
|
118
209
|
"input": messages,
|
|
119
210
|
"tools": [tool_definition],
|
|
120
|
-
"tool_choice": {"type": "web_search_preview"} # force the use of web search tool
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
headers = {
|
|
124
|
-
"Authorization": f"Bearer {self.api_key}",
|
|
125
|
-
"Content-Type": "application/json",
|
|
126
|
-
"OpenAI-Beta": "assistants=v1" # Required for some beta features, might be useful
|
|
211
|
+
"tool_choice": {"type": "web_search_preview"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
|
|
127
212
|
}
|
|
128
|
-
api_url = "https://api.openai.com/v1/responses"
|
|
129
213
|
|
|
130
214
|
try:
|
|
131
|
-
res =
|
|
132
|
-
res = SearchResult(res.
|
|
215
|
+
res = self.client.responses.create(**payload)
|
|
216
|
+
res = SearchResult(res.dict())
|
|
133
217
|
except Exception as e:
|
|
134
218
|
CustomUserWarning(f"Failed to make request: {e}", raise_with=ValueError)
|
|
135
219
|
|
symai/models/base.py
CHANGED
|
@@ -74,8 +74,6 @@ class LLMDataModel(BaseModel):
|
|
|
74
74
|
origin = get_origin(field_type)
|
|
75
75
|
return origin in (list, set, frozenset, tuple, dict) or field_type in (list, set, frozenset, tuple, dict)
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
|
|
79
77
|
@staticmethod
|
|
80
78
|
def _is_const_field(field_info) -> bool:
|
|
81
79
|
"""Check if a field is a const field."""
|
|
@@ -94,8 +92,6 @@ class LLMDataModel(BaseModel):
|
|
|
94
92
|
"""Check if a field has a default value."""
|
|
95
93
|
return field_info.default != ... and field_info.default != PydanticUndefined
|
|
96
94
|
|
|
97
|
-
|
|
98
|
-
|
|
99
95
|
def format_field(self, key: str, value: Any, indent: int = 0, visited: set = None, depth: int = 0) -> str:
|
|
100
96
|
"""Formats a field value for string representation, handling nested structures."""
|
|
101
97
|
visited = visited or set()
|
|
@@ -247,7 +243,7 @@ class LLMDataModel(BaseModel):
|
|
|
247
243
|
definitions = cls._extract_schema_definitions(schema)
|
|
248
244
|
|
|
249
245
|
main_schema = cls._format_schema_fields(properties, schema, definitions, 0)
|
|
250
|
-
definitions_schema = cls._format_schema_definitions(definitions)
|
|
246
|
+
definitions_schema = cls._format_schema_definitions(definitions, schema)
|
|
251
247
|
|
|
252
248
|
return cls._compose_schema_output(main_schema, definitions_schema)
|
|
253
249
|
|
|
@@ -458,11 +454,65 @@ class LLMDataModel(BaseModel):
|
|
|
458
454
|
return f"nested object ({ref_name})"
|
|
459
455
|
|
|
460
456
|
@classmethod
|
|
461
|
-
def _format_schema_definitions(cls, definitions: dict) -> str:
|
|
462
|
-
"""Format schema definitions using descriptions
|
|
457
|
+
def _format_schema_definitions(cls, definitions: dict, root_schema: dict | None = None) -> str:
|
|
458
|
+
"""Format schema definitions using descriptions and examples; omit redundant types.
|
|
459
|
+
|
|
460
|
+
Also includes the root model's fields (from root_schema) so their descriptions/examples
|
|
461
|
+
are visible, not just $defs.
|
|
462
|
+
"""
|
|
463
463
|
lines = []
|
|
464
464
|
visited_defs = set()
|
|
465
465
|
|
|
466
|
+
def _format_definition_properties(props: dict) -> list[str]:
|
|
467
|
+
"""Render property lines using only Field(description=...), with const/excerpts.
|
|
468
|
+
|
|
469
|
+
Always lists properties; if description is missing, emit a generic guidance message.
|
|
470
|
+
"""
|
|
471
|
+
out: list[str] = []
|
|
472
|
+
def _fmt_example_value(val):
|
|
473
|
+
if isinstance(val, str):
|
|
474
|
+
return val
|
|
475
|
+
try:
|
|
476
|
+
return json.dumps(val, ensure_ascii=False)
|
|
477
|
+
except Exception:
|
|
478
|
+
return str(val)
|
|
479
|
+
for prop_name, prop_schema in props.items():
|
|
480
|
+
if prop_name == "section_header":
|
|
481
|
+
continue
|
|
482
|
+
desc = prop_schema.get("description")
|
|
483
|
+
const_note = ""
|
|
484
|
+
if "const_value" in prop_schema:
|
|
485
|
+
const_note = f' (const value: "{prop_schema["const_value"]}")'
|
|
486
|
+
if not desc:
|
|
487
|
+
out.append(
|
|
488
|
+
f' - "{prop_name}": '
|
|
489
|
+
'No definition provided. Focus on the [[Schema]] and the prompt to infer '
|
|
490
|
+
'the expected structure and constraints.'
|
|
491
|
+
)
|
|
492
|
+
else:
|
|
493
|
+
out.append(f' - "{prop_name}": {desc}{const_note}')
|
|
494
|
+
|
|
495
|
+
examples = prop_schema.get("examples")
|
|
496
|
+
if examples is None and "example" in prop_schema:
|
|
497
|
+
examples = prop_schema.get("example")
|
|
498
|
+
|
|
499
|
+
if isinstance(examples, (list, tuple)):
|
|
500
|
+
if len(examples) > 0:
|
|
501
|
+
out.append(" - Examples:")
|
|
502
|
+
for ex in examples:
|
|
503
|
+
out.append(f" - {_fmt_example_value(ex)}")
|
|
504
|
+
elif examples is not None:
|
|
505
|
+
out.append(f" - Example: {_fmt_example_value(examples)}")
|
|
506
|
+
return out
|
|
507
|
+
|
|
508
|
+
# Include root model's fields in Definitions (for descriptions/examples)
|
|
509
|
+
if root_schema and isinstance(root_schema, dict):
|
|
510
|
+
root_title = root_schema.get("title", "Root")
|
|
511
|
+
root_props = cls._extract_schema_properties(root_schema)
|
|
512
|
+
if root_props:
|
|
513
|
+
lines.append(f"- {root_title}:")
|
|
514
|
+
lines.extend(_format_definition_properties(root_props))
|
|
515
|
+
|
|
466
516
|
for name, definition in definitions.items():
|
|
467
517
|
if name in visited_defs:
|
|
468
518
|
continue
|
|
@@ -475,17 +525,7 @@ class LLMDataModel(BaseModel):
|
|
|
475
525
|
continue
|
|
476
526
|
|
|
477
527
|
props = definition.get("properties", {})
|
|
478
|
-
|
|
479
|
-
if prop_name == "section_header":
|
|
480
|
-
continue
|
|
481
|
-
desc = prop_schema.get("description") or prop_schema.get("title")
|
|
482
|
-
if "const_value" in prop_schema:
|
|
483
|
-
const_value = prop_schema["const_value"]
|
|
484
|
-
const_note = f' (const value: "{const_value}")'
|
|
485
|
-
else:
|
|
486
|
-
const_note = ""
|
|
487
|
-
if desc:
|
|
488
|
-
lines.append(f' - "{prop_name}": {desc}{const_note}')
|
|
528
|
+
lines.extend(_format_definition_properties(props))
|
|
489
529
|
|
|
490
530
|
return "\n".join(lines)
|
|
491
531
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 0.17.
|
|
3
|
+
Version: 0.17.6
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
6
|
Project-URL: Homepage, https://extensity.ai
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
symai/TERMS_OF_SERVICE.md,sha256=HN42UXVI_wAVDHjMShzy_k7xAsbjXaATNeMKcIte_eg,91409
|
|
2
|
-
symai/__init__.py,sha256=
|
|
2
|
+
symai/__init__.py,sha256=bL2mJzfktI9IqkdePuW2tDxOFfDNn22bdqM01brcGus,16464
|
|
3
3
|
symai/chat.py,sha256=vqEe7NqSWdzr9ixkko_094SR1LIbgPLcZxQ8W7782N4,12775
|
|
4
4
|
symai/components.py,sha256=L_Kbuyc0JW6c8zkVRd406HzPifLM3ZytRVas6EHE7Ls,50617
|
|
5
5
|
symai/constraints.py,sha256=S1ywLB8nFQy4-beDoJz6IvLTiZHGR8Fu5RNTY4v5zG0,1641
|
|
@@ -55,7 +55,7 @@ symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py,sha256=YgxR
|
|
|
55
55
|
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py,sha256=QVbyZybUPSAQHiA66V6we2W2dAsk52g1kJ7kMdGqb9I,22951
|
|
56
56
|
symai/backend/engines/ocr/engine_apilayer.py,sha256=hZo4lk0ECRIzaGEpmCSNjR5Xrh8mwkKMD2ddpdgioVU,2399
|
|
57
57
|
symai/backend/engines/output/engine_stdout.py,sha256=2hhyhMHFJTfjVRaODYd_5XPnV9pT03URcpYbeMY_USU,951
|
|
58
|
-
symai/backend/engines/search/engine_openai.py,sha256=
|
|
58
|
+
symai/backend/engines/search/engine_openai.py,sha256=zARzTr0qO7p8o1TCS441KNIgtZR9-mjdjyICO2ajtVw,8492
|
|
59
59
|
symai/backend/engines/search/engine_perplexity.py,sha256=yxuhGaA38d1FRbLv6piLll0QDxCCyBVK6eeomjYNryM,4157
|
|
60
60
|
symai/backend/engines/search/engine_serpapi.py,sha256=UqvGHs1J9BOv05C0FJUQjbz29_VuWncIkeDwlRPUilU,3698
|
|
61
61
|
symai/backend/engines/speech_to_text/engine_local_whisper.py,sha256=LRsXliCpHDFPFaE-vPky3-DLkmYwmwe2mxfF0Brz4Wg,8220
|
|
@@ -144,7 +144,7 @@ symai/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
144
144
|
symai/misc/console.py,sha256=bo8uTI0RpjUPARwVXaVrcbbm1RbmUU2cBkYnZmHiTwY,2892
|
|
145
145
|
symai/misc/loader.py,sha256=7lyIMIvU6Ywo_xt-TM8Xqhc6W4tY67U5XzxSkuAYZi8,1635
|
|
146
146
|
symai/models/__init__.py,sha256=QCYmMOhHk3t6HhTApBMxIeS3dX4_bKfHNr6a9LzRa8s,163
|
|
147
|
-
symai/models/base.py,sha256=
|
|
147
|
+
symai/models/base.py,sha256=lnkcCwJfv_Yg5kiLRazbVq9jIRKhNR75W8_S9NBVmMo,40881
|
|
148
148
|
symai/models/errors.py,sha256=wErnNZxy6Px8D2xQKOS3cco3C_KiBn6ovX2oBUc0YmA,941
|
|
149
149
|
symai/ops/__init__.py,sha256=FSzZrw7ZYom51rUVrFWdj440wN_3UHv8tfgZo7sVxCU,633
|
|
150
150
|
symai/ops/measures.py,sha256=ENoY92LUlpGA05hEz7pf42FW5cv-xH2wEQU3IRMqoUU,3679
|
|
@@ -152,8 +152,8 @@ symai/ops/primitives.py,sha256=EaB2Ekx9yGNDaQa3aKS5KpuEr5awAUbO3OcBbufI-l4,11072
|
|
|
152
152
|
symai/server/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
153
153
|
symai/server/huggingface_server.py,sha256=UpSBflnQaenDjY1AAn5LUYeg5J4gJLWiMuC5DcoIV3E,8743
|
|
154
154
|
symai/server/llama_cpp_server.py,sha256=qVCldTdcQhK2YCu7sDNSYziu1p2AQieqMFfY028-yOc,2049
|
|
155
|
-
symbolicai-0.17.
|
|
156
|
-
symbolicai-0.17.
|
|
157
|
-
symbolicai-0.17.
|
|
158
|
-
symbolicai-0.17.
|
|
159
|
-
symbolicai-0.17.
|
|
155
|
+
symbolicai-0.17.6.dist-info/METADATA,sha256=UQTu70XQcvbzRzoeq8TBdbxbiUnRpG41JzVaR4XJ1j8,21327
|
|
156
|
+
symbolicai-0.17.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
157
|
+
symbolicai-0.17.6.dist-info/entry_points.txt,sha256=JV5sdydIfUZdDF6QBEQHiZHod6XNPjCjpWQrXh7gTAw,261
|
|
158
|
+
symbolicai-0.17.6.dist-info/top_level.txt,sha256=bOoIDfpDIvCQtQgXcwVKJvxAKwsxpxo2IL4z92rNJjw,6
|
|
159
|
+
symbolicai-0.17.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|