symbolicai 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +21 -71
- symai/backend/base.py +0 -26
- symai/backend/engines/drawing/engine_gemini_image.py +101 -0
- symai/backend/engines/embedding/engine_openai.py +11 -8
- symai/backend/engines/neurosymbolic/__init__.py +8 -0
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +14 -1
- symai/backend/engines/neurosymbolic/engine_openrouter.py +294 -0
- symai/backend/engines/scrape/engine_requests.py +39 -10
- symai/backend/engines/search/__init__.py +13 -0
- symai/backend/engines/search/engine_firecrawl.py +333 -0
- symai/backend/engines/search/engine_parallel.py +5 -5
- symai/backend/mixin/__init__.py +4 -0
- symai/backend/mixin/openrouter.py +2 -0
- symai/components.py +212 -16
- symai/extended/interfaces/firecrawl.py +30 -0
- symai/extended/interfaces/nanobanana.py +23 -0
- symai/extended/interfaces/parallel.py +5 -5
- symai/functional.py +3 -4
- symai/interfaces.py +2 -0
- symai/ops/primitives.py +0 -18
- symai/shellsv.py +2 -7
- {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/METADATA +3 -9
- {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/RECORD +27 -47
- {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/WHEEL +1 -1
- symai/backend/driver/webclient.py +0 -217
- symai/backend/engines/crawler/engine_selenium.py +0 -94
- symai/backend/engines/drawing/engine_dall_e.py +0 -131
- symai/backend/engines/embedding/engine_plugin_embeddings.py +0 -12
- symai/backend/engines/experiments/engine_bard_wrapper.py +0 -131
- symai/backend/engines/experiments/engine_gptfinetuner.py +0 -32
- symai/backend/engines/experiments/engine_llamacpp_completion.py +0 -142
- symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py +0 -277
- symai/collect/__init__.py +0 -8
- symai/collect/dynamic.py +0 -117
- symai/collect/pipeline.py +0 -156
- symai/collect/stats.py +0 -434
- symai/extended/crawler.py +0 -21
- symai/extended/interfaces/selenium.py +0 -18
- symai/extended/interfaces/vectordb.py +0 -21
- symai/extended/personas/__init__.py +0 -3
- symai/extended/personas/builder.py +0 -105
- symai/extended/personas/dialogue.py +0 -126
- symai/extended/personas/persona.py +0 -154
- symai/extended/personas/research/__init__.py +0 -1
- symai/extended/personas/research/yann_lecun.py +0 -62
- symai/extended/personas/sales/__init__.py +0 -1
- symai/extended/personas/sales/erik_james.py +0 -62
- symai/extended/personas/student/__init__.py +0 -1
- symai/extended/personas/student/max_tenner.py +0 -51
- symai/extended/strategies/__init__.py +0 -1
- symai/extended/strategies/cot.py +0 -40
- {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
|
7
|
+
|
|
8
|
+
from firecrawl import Firecrawl
|
|
9
|
+
from firecrawl.v2.types import ScrapeOptions
|
|
10
|
+
|
|
11
|
+
from ....symbol import Result
|
|
12
|
+
from ....utils import UserMessage
|
|
13
|
+
from ...base import Engine
|
|
14
|
+
from ...settings import SYMAI_CONFIG
|
|
15
|
+
|
|
16
|
+
logging.getLogger("requests").setLevel(logging.ERROR)
|
|
17
|
+
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
|
18
|
+
logging.getLogger("httpx").setLevel(logging.ERROR)
|
|
19
|
+
|
|
20
|
+
TRACKING_KEYS = {
|
|
21
|
+
"utm_source",
|
|
22
|
+
"utm_medium",
|
|
23
|
+
"utm_campaign",
|
|
24
|
+
"utm_term",
|
|
25
|
+
"utm_content",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Citation:
|
|
31
|
+
id: int
|
|
32
|
+
title: str
|
|
33
|
+
url: str
|
|
34
|
+
start: int
|
|
35
|
+
end: int
|
|
36
|
+
|
|
37
|
+
def __hash__(self):
|
|
38
|
+
return hash((self.url,))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class FirecrawlSearchResult(Result):
|
|
42
|
+
def __init__(
|
|
43
|
+
self, value: dict[str, Any] | Any, max_chars_per_result: int | None = None, **kwargs
|
|
44
|
+
) -> None:
|
|
45
|
+
raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
|
|
46
|
+
super().__init__(raw_dict, **kwargs)
|
|
47
|
+
self._citations: list[Citation] = []
|
|
48
|
+
self._max_chars_per_result = max_chars_per_result
|
|
49
|
+
try:
|
|
50
|
+
text, citations = self._build_text_and_citations(raw_dict)
|
|
51
|
+
self._value = text
|
|
52
|
+
self._citations = citations
|
|
53
|
+
except Exception as e:
|
|
54
|
+
self._value = None
|
|
55
|
+
UserMessage(f"Failed to parse Firecrawl search response: {e}", raise_with=ValueError)
|
|
56
|
+
|
|
57
|
+
def _build_text_and_citations(self, data: dict[str, Any]) -> tuple[str, list[Citation]]:
|
|
58
|
+
results = []
|
|
59
|
+
for source in ["web", "news", "images"]:
|
|
60
|
+
source_data = data.get(source) or []
|
|
61
|
+
results.extend(source_data)
|
|
62
|
+
|
|
63
|
+
if not results:
|
|
64
|
+
return "", []
|
|
65
|
+
|
|
66
|
+
parts = []
|
|
67
|
+
citations = []
|
|
68
|
+
cursor = 0
|
|
69
|
+
|
|
70
|
+
for idx, item in enumerate(results, 1):
|
|
71
|
+
# Handle both SearchResultWeb (url/title at top level) and Document (url/title in metadata)
|
|
72
|
+
metadata = item.get("metadata") or {}
|
|
73
|
+
url = item.get("url") or metadata.get("url") or metadata.get("source_url") or ""
|
|
74
|
+
title = item.get("title") or metadata.get("title") or ""
|
|
75
|
+
|
|
76
|
+
if not url:
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
# Check if this is a scraped result (has markdown content)
|
|
80
|
+
markdown = item.get("markdown", "")
|
|
81
|
+
if markdown:
|
|
82
|
+
content = markdown
|
|
83
|
+
if self._max_chars_per_result and len(content) > self._max_chars_per_result:
|
|
84
|
+
content = content[: self._max_chars_per_result] + "..."
|
|
85
|
+
result_text = f"{title}\n{url}\n{content}"
|
|
86
|
+
else:
|
|
87
|
+
description = (
|
|
88
|
+
item.get("description")
|
|
89
|
+
or item.get("snippet")
|
|
90
|
+
or metadata.get("description")
|
|
91
|
+
or ""
|
|
92
|
+
)
|
|
93
|
+
result_text = f"{title}\n{url}"
|
|
94
|
+
if description:
|
|
95
|
+
if self._max_chars_per_result and len(description) > self._max_chars_per_result:
|
|
96
|
+
description = description[: self._max_chars_per_result] + "..."
|
|
97
|
+
result_text += f"\n{description}"
|
|
98
|
+
|
|
99
|
+
if parts:
|
|
100
|
+
parts.append("\n\n")
|
|
101
|
+
cursor += 2
|
|
102
|
+
|
|
103
|
+
parts.append(result_text)
|
|
104
|
+
cursor += len(result_text)
|
|
105
|
+
|
|
106
|
+
marker = f"[{idx}]"
|
|
107
|
+
start = cursor
|
|
108
|
+
parts.append(marker)
|
|
109
|
+
cursor += len(marker)
|
|
110
|
+
|
|
111
|
+
citations.append(Citation(id=idx, title=title, url=url, start=start, end=cursor))
|
|
112
|
+
|
|
113
|
+
text = "".join(parts)
|
|
114
|
+
return text, citations
|
|
115
|
+
|
|
116
|
+
def __str__(self) -> str:
|
|
117
|
+
if isinstance(self._value, str) and self._value:
|
|
118
|
+
return self._value
|
|
119
|
+
try:
|
|
120
|
+
return json.dumps(self.raw, indent=2)
|
|
121
|
+
except TypeError:
|
|
122
|
+
return str(self.raw)
|
|
123
|
+
|
|
124
|
+
def _repr_html_(self) -> str:
|
|
125
|
+
if isinstance(self._value, str) and self._value:
|
|
126
|
+
return f"<pre>{self._value}</pre>"
|
|
127
|
+
try:
|
|
128
|
+
return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
|
|
129
|
+
except Exception:
|
|
130
|
+
return f"<pre>{self.raw!s}</pre>"
|
|
131
|
+
|
|
132
|
+
def get_citations(self) -> list[Citation]:
|
|
133
|
+
return self._citations
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class FirecrawlExtractResult(Result):
|
|
137
|
+
"""Result wrapper for Firecrawl scrape API responses."""
|
|
138
|
+
|
|
139
|
+
def __init__(self, value: Any, **kwargs) -> None:
|
|
140
|
+
raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
|
|
141
|
+
super().__init__(raw_dict, **kwargs)
|
|
142
|
+
try:
|
|
143
|
+
self._value = self._extract_content(raw_dict)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
self._value = None
|
|
146
|
+
UserMessage(f"Failed to parse Firecrawl scrape response: {e}", raise_with=ValueError)
|
|
147
|
+
|
|
148
|
+
def _extract_content(self, data: dict[str, Any]) -> str:
|
|
149
|
+
content = data.get("markdown") or data.get("html") or data.get("raw_html")
|
|
150
|
+
if content:
|
|
151
|
+
return str(content)
|
|
152
|
+
json_data = data.get("json")
|
|
153
|
+
if json_data:
|
|
154
|
+
return json.dumps(json_data, indent=2)
|
|
155
|
+
return ""
|
|
156
|
+
|
|
157
|
+
def __str__(self) -> str:
|
|
158
|
+
try:
|
|
159
|
+
return str(self._value or "")
|
|
160
|
+
except Exception:
|
|
161
|
+
return ""
|
|
162
|
+
|
|
163
|
+
def _repr_html_(self) -> str:
|
|
164
|
+
try:
|
|
165
|
+
return f"<pre>{self._value or ''}</pre>"
|
|
166
|
+
except Exception:
|
|
167
|
+
return "<pre></pre>"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class FirecrawlEngine(Engine):
|
|
171
|
+
def __init__(self, api_key: str | None = None):
|
|
172
|
+
super().__init__()
|
|
173
|
+
self.config = deepcopy(SYMAI_CONFIG)
|
|
174
|
+
self.api_key = api_key or self.config.get("SEARCH_ENGINE_API_KEY")
|
|
175
|
+
self.model = self.config.get("SEARCH_ENGINE_MODEL")
|
|
176
|
+
self.name = self.__class__.__name__
|
|
177
|
+
|
|
178
|
+
if not self.api_key:
|
|
179
|
+
UserMessage(
|
|
180
|
+
"Firecrawl API key not found. Set SEARCH_ENGINE_API_KEY in config or environment.",
|
|
181
|
+
raise_with=ValueError,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
self.client = Firecrawl(api_key=self.api_key)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
UserMessage(f"Failed to initialize Firecrawl client: {e}", raise_with=ValueError)
|
|
188
|
+
|
|
189
|
+
def id(self) -> str:
|
|
190
|
+
if (
|
|
191
|
+
self.config.get("SEARCH_ENGINE_API_KEY")
|
|
192
|
+
and str(self.config.get("SEARCH_ENGINE_MODEL", "")).lower() == "firecrawl"
|
|
193
|
+
):
|
|
194
|
+
return "search"
|
|
195
|
+
return super().id()
|
|
196
|
+
|
|
197
|
+
def command(self, *args, **kwargs):
|
|
198
|
+
super().command(*args, **kwargs)
|
|
199
|
+
if "SEARCH_ENGINE_API_KEY" in kwargs:
|
|
200
|
+
self.api_key = kwargs["SEARCH_ENGINE_API_KEY"]
|
|
201
|
+
if "SEARCH_ENGINE_MODEL" in kwargs:
|
|
202
|
+
self.model = kwargs["SEARCH_ENGINE_MODEL"]
|
|
203
|
+
|
|
204
|
+
def _normalize_url(self, url: str) -> str:
|
|
205
|
+
parts = urlsplit(url)
|
|
206
|
+
filtered_query = [
|
|
207
|
+
(k, v)
|
|
208
|
+
for k, v in parse_qsl(parts.query, keep_blank_values=True)
|
|
209
|
+
if k not in TRACKING_KEYS and not k.lower().startswith("utm_")
|
|
210
|
+
]
|
|
211
|
+
query = urlencode(filtered_query, doseq=True)
|
|
212
|
+
return urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment))
|
|
213
|
+
|
|
214
|
+
def _search(self, query: str, kwargs: dict[str, Any]):
|
|
215
|
+
if not query:
|
|
216
|
+
UserMessage(
|
|
217
|
+
"FirecrawlEngine._search requires a non-empty query.", raise_with=ValueError
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
max_chars_per_result = kwargs.get("max_chars_per_result")
|
|
221
|
+
|
|
222
|
+
# Build search kwargs
|
|
223
|
+
search_kwargs = {}
|
|
224
|
+
if "limit" in kwargs:
|
|
225
|
+
search_kwargs["limit"] = kwargs["limit"]
|
|
226
|
+
if "location" in kwargs:
|
|
227
|
+
search_kwargs["location"] = kwargs["location"]
|
|
228
|
+
if "tbs" in kwargs:
|
|
229
|
+
search_kwargs["tbs"] = kwargs["tbs"]
|
|
230
|
+
if "sources" in kwargs:
|
|
231
|
+
search_kwargs["sources"] = kwargs["sources"]
|
|
232
|
+
if "categories" in kwargs:
|
|
233
|
+
search_kwargs["categories"] = kwargs["categories"]
|
|
234
|
+
if "timeout" in kwargs:
|
|
235
|
+
search_kwargs["timeout"] = kwargs["timeout"]
|
|
236
|
+
|
|
237
|
+
# Build scrape options for search results content
|
|
238
|
+
scrape_opts = {}
|
|
239
|
+
if "formats" in kwargs:
|
|
240
|
+
scrape_opts["formats"] = kwargs["formats"]
|
|
241
|
+
if "proxy" in kwargs:
|
|
242
|
+
scrape_opts["proxy"] = kwargs["proxy"]
|
|
243
|
+
if "only_main_content" in kwargs:
|
|
244
|
+
scrape_opts["only_main_content"] = kwargs["only_main_content"]
|
|
245
|
+
if "scrape_location" in kwargs:
|
|
246
|
+
scrape_opts["location"] = kwargs["scrape_location"]
|
|
247
|
+
if "include_tags" in kwargs:
|
|
248
|
+
scrape_opts["include_tags"] = kwargs["include_tags"]
|
|
249
|
+
if "exclude_tags" in kwargs:
|
|
250
|
+
scrape_opts["exclude_tags"] = kwargs["exclude_tags"]
|
|
251
|
+
|
|
252
|
+
if scrape_opts:
|
|
253
|
+
search_kwargs["scrape_options"] = ScrapeOptions(**scrape_opts)
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
result = self.client.search(query, **search_kwargs)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
UserMessage(f"Failed to call Firecrawl Search API: {e}", raise_with=ValueError)
|
|
259
|
+
|
|
260
|
+
raw = result.model_dump() if hasattr(result, "model_dump") else result
|
|
261
|
+
return [FirecrawlSearchResult(result, max_chars_per_result=max_chars_per_result)], {
|
|
262
|
+
"raw_output": raw
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
def _extract(self, url: str, kwargs: dict[str, Any]):
|
|
266
|
+
normalized_url = self._normalize_url(url)
|
|
267
|
+
|
|
268
|
+
# Build scrape kwargs
|
|
269
|
+
scrape_kwargs = {"formats": kwargs.get("formats", ["markdown"])}
|
|
270
|
+
if "only_main_content" in kwargs:
|
|
271
|
+
scrape_kwargs["only_main_content"] = kwargs["only_main_content"]
|
|
272
|
+
if "timeout" in kwargs:
|
|
273
|
+
scrape_kwargs["timeout"] = kwargs["timeout"]
|
|
274
|
+
if "proxy" in kwargs:
|
|
275
|
+
scrape_kwargs["proxy"] = kwargs["proxy"]
|
|
276
|
+
if "location" in kwargs:
|
|
277
|
+
scrape_kwargs["location"] = kwargs["location"]
|
|
278
|
+
if "max_age" in kwargs:
|
|
279
|
+
scrape_kwargs["max_age"] = kwargs["max_age"]
|
|
280
|
+
if "store_in_cache" in kwargs:
|
|
281
|
+
scrape_kwargs["store_in_cache"] = kwargs["store_in_cache"]
|
|
282
|
+
if "actions" in kwargs:
|
|
283
|
+
scrape_kwargs["actions"] = kwargs["actions"]
|
|
284
|
+
if "headers" in kwargs:
|
|
285
|
+
scrape_kwargs["headers"] = kwargs["headers"]
|
|
286
|
+
if "include_tags" in kwargs:
|
|
287
|
+
scrape_kwargs["include_tags"] = kwargs["include_tags"]
|
|
288
|
+
if "exclude_tags" in kwargs:
|
|
289
|
+
scrape_kwargs["exclude_tags"] = kwargs["exclude_tags"]
|
|
290
|
+
if "wait_for" in kwargs:
|
|
291
|
+
scrape_kwargs["wait_for"] = kwargs["wait_for"]
|
|
292
|
+
if "mobile" in kwargs:
|
|
293
|
+
scrape_kwargs["mobile"] = kwargs["mobile"]
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
result = self.client.scrape(normalized_url, **scrape_kwargs)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
UserMessage(f"Failed to call Firecrawl Scrape API: {e}", raise_with=ValueError)
|
|
299
|
+
|
|
300
|
+
raw = result.model_dump() if hasattr(result, "model_dump") else result
|
|
301
|
+
return [FirecrawlExtractResult(result)], {"raw_output": raw, "final_url": normalized_url}
|
|
302
|
+
|
|
303
|
+
def forward(self, argument):
|
|
304
|
+
kwargs = argument.kwargs
|
|
305
|
+
url = argument.prop.url or kwargs.get("url")
|
|
306
|
+
if url:
|
|
307
|
+
return self._extract(str(url), kwargs)
|
|
308
|
+
|
|
309
|
+
raw_query = argument.prop.prepared_input
|
|
310
|
+
if raw_query is None:
|
|
311
|
+
raw_query = argument.prop.query
|
|
312
|
+
|
|
313
|
+
query = str(raw_query or "").strip() if raw_query else ""
|
|
314
|
+
if not query:
|
|
315
|
+
UserMessage(
|
|
316
|
+
"FirecrawlEngine.forward requires at least one non-empty query or url.",
|
|
317
|
+
raise_with=ValueError,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return self._search(query, kwargs)
|
|
321
|
+
|
|
322
|
+
def prepare(self, argument):
|
|
323
|
+
url = argument.kwargs.get("url") or argument.prop.url
|
|
324
|
+
if url:
|
|
325
|
+
argument.prop.prepared_input = str(url)
|
|
326
|
+
return
|
|
327
|
+
|
|
328
|
+
query = argument.prop.query
|
|
329
|
+
if isinstance(query, list):
|
|
330
|
+
argument.prop.prepared_input = " ".join(str(q) for q in query if q)
|
|
331
|
+
return
|
|
332
|
+
|
|
333
|
+
argument.prop.prepared_input = str(query or "").strip()
|
|
@@ -66,7 +66,7 @@ class Citation:
|
|
|
66
66
|
return hash((self.url,))
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
class
|
|
69
|
+
class ParallelSearchResult(Result):
|
|
70
70
|
def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
|
|
71
71
|
super().__init__(value, **kwargs)
|
|
72
72
|
if isinstance(value, dict) and value.get("error"):
|
|
@@ -286,7 +286,7 @@ class SearchResult(Result):
|
|
|
286
286
|
return self._citations
|
|
287
287
|
|
|
288
288
|
|
|
289
|
-
class
|
|
289
|
+
class ParallelExtractResult(Result):
|
|
290
290
|
"""Result wrapper for Parallel Extract API responses."""
|
|
291
291
|
|
|
292
292
|
def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
|
|
@@ -485,7 +485,7 @@ class ParallelEngine(Engine):
|
|
|
485
485
|
)
|
|
486
486
|
except Exception as e:
|
|
487
487
|
UserMessage(f"Failed to call Parallel Search API: {e}", raise_with=ValueError)
|
|
488
|
-
return [
|
|
488
|
+
return [ParallelSearchResult(result)], {"raw_output": result}
|
|
489
489
|
|
|
490
490
|
def _task(self, queries: list[str], kwargs: dict[str, Any]):
|
|
491
491
|
processor_name = self._coerce_processor(kwargs.get("processor"))
|
|
@@ -521,7 +521,7 @@ class ParallelEngine(Engine):
|
|
|
521
521
|
result = self._fetch_task_result(run.run_id, timeout=timeout, api_timeout=api_timeout)
|
|
522
522
|
|
|
523
523
|
payload = self._task_result_to_search_payload(result)
|
|
524
|
-
return [
|
|
524
|
+
return [ParallelSearchResult(payload)], {
|
|
525
525
|
"raw_output": result,
|
|
526
526
|
"task_output": payload.get("task_output"),
|
|
527
527
|
"task_output_type": payload.get("task_output_type"),
|
|
@@ -699,7 +699,7 @@ class ParallelEngine(Engine):
|
|
|
699
699
|
)
|
|
700
700
|
except Exception as e:
|
|
701
701
|
UserMessage(f"Failed to call Parallel Extract API: {e}", raise_with=ValueError)
|
|
702
|
-
return [
|
|
702
|
+
return [ParallelExtractResult(result)], {"raw_output": result, "final_url": url}
|
|
703
703
|
|
|
704
704
|
def forward(self, argument):
|
|
705
705
|
kwargs = argument.kwargs
|
symai/backend/mixin/__init__.py
CHANGED
|
@@ -11,6 +11,8 @@ from .groq import SUPPORTED_REASONING_MODELS as GROQ_REASONING_MODELS
|
|
|
11
11
|
from .openai import SUPPORTED_CHAT_MODELS as OPENAI_CHAT_MODELS
|
|
12
12
|
from .openai import SUPPORTED_REASONING_MODELS as OPENAI_REASONING_MODELS
|
|
13
13
|
from .openai import SUPPORTED_RESPONSES_MODELS as OPENAI_RESPONSES_MODELS
|
|
14
|
+
from .openrouter import SUPPORTED_CHAT_MODELS as OPENROUTER_CHAT_MODELS
|
|
15
|
+
from .openrouter import SUPPORTED_REASONING_MODELS as OPENROUTER_REASONING_MODELS
|
|
14
16
|
|
|
15
17
|
__all__ = [
|
|
16
18
|
"ANTHROPIC_CHAT_MODELS",
|
|
@@ -26,4 +28,6 @@ __all__ = [
|
|
|
26
28
|
"OPENAI_CHAT_MODELS",
|
|
27
29
|
"OPENAI_REASONING_MODELS",
|
|
28
30
|
"OPENAI_RESPONSES_MODELS",
|
|
31
|
+
"OPENROUTER_CHAT_MODELS",
|
|
32
|
+
"OPENROUTER_REASONING_MODELS",
|
|
29
33
|
]
|
symai/components.py
CHANGED
|
@@ -1229,6 +1229,7 @@ class MetadataTracker(Expression):
|
|
|
1229
1229
|
and frame.f_code.co_name == "forward"
|
|
1230
1230
|
and "self" in frame.f_locals
|
|
1231
1231
|
and isinstance(frame.f_locals["self"], Engine)
|
|
1232
|
+
and arg is not None # Ensure arg is not None to avoid unpacking error on exceptions
|
|
1232
1233
|
):
|
|
1233
1234
|
_, metadata = arg # arg contains return value on 'return' event
|
|
1234
1235
|
engine_name = frame.f_locals["self"].__class__.__name__
|
|
@@ -1350,6 +1351,116 @@ class MetadataTracker(Expression):
|
|
|
1350
1351
|
token_details[(engine_name, model_name)]["completion_breakdown"][
|
|
1351
1352
|
"reasoning_tokens"
|
|
1352
1353
|
] += 0
|
|
1354
|
+
elif engine_name in ("ClaudeXChatEngine", "ClaudeXReasoningEngine"):
|
|
1355
|
+
raw_output = metadata["raw_output"]
|
|
1356
|
+
usage = self._extract_claude_usage(raw_output)
|
|
1357
|
+
if usage is None:
|
|
1358
|
+
# Skip if we can't extract usage (shouldn't happen normally)
|
|
1359
|
+
logger.warning(f"Could not extract usage from {engine_name} response.")
|
|
1360
|
+
token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
|
|
1361
|
+
token_details[(engine_name, model_name)]["prompt_breakdown"][
|
|
1362
|
+
"cached_tokens"
|
|
1363
|
+
] += 0
|
|
1364
|
+
token_details[(engine_name, model_name)]["completion_breakdown"][
|
|
1365
|
+
"reasoning_tokens"
|
|
1366
|
+
] += 0
|
|
1367
|
+
continue
|
|
1368
|
+
input_tokens = getattr(usage, "input_tokens", 0) or 0
|
|
1369
|
+
output_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
1370
|
+
token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
|
|
1371
|
+
input_tokens
|
|
1372
|
+
)
|
|
1373
|
+
token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
|
|
1374
|
+
output_tokens
|
|
1375
|
+
)
|
|
1376
|
+
# Calculate total tokens
|
|
1377
|
+
total = input_tokens + output_tokens
|
|
1378
|
+
token_details[(engine_name, model_name)]["usage"]["total_tokens"] += total
|
|
1379
|
+
token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
|
|
1380
|
+
# Track cache tokens if available
|
|
1381
|
+
cache_creation = getattr(usage, "cache_creation_input_tokens", 0) or 0
|
|
1382
|
+
cache_read = getattr(usage, "cache_read_input_tokens", 0) or 0
|
|
1383
|
+
token_details[(engine_name, model_name)]["prompt_breakdown"][
|
|
1384
|
+
"cache_creation_tokens"
|
|
1385
|
+
] += cache_creation
|
|
1386
|
+
token_details[(engine_name, model_name)]["prompt_breakdown"][
|
|
1387
|
+
"cache_read_tokens"
|
|
1388
|
+
] += cache_read
|
|
1389
|
+
# For backward compatibility, also track as cached_tokens
|
|
1390
|
+
token_details[(engine_name, model_name)]["prompt_breakdown"][
|
|
1391
|
+
"cached_tokens"
|
|
1392
|
+
] += cache_read
|
|
1393
|
+
# Track reasoning/thinking tokens for ClaudeXReasoningEngine
|
|
1394
|
+
if engine_name == "ClaudeXReasoningEngine":
|
|
1395
|
+
thinking_output = metadata.get("thinking", "")
|
|
1396
|
+
# Store thinking content if available
|
|
1397
|
+
if thinking_output:
|
|
1398
|
+
if "thinking_content" not in token_details[(engine_name, model_name)]:
|
|
1399
|
+
token_details[(engine_name, model_name)]["thinking_content"] = []
|
|
1400
|
+
token_details[(engine_name, model_name)]["thinking_content"].append(
|
|
1401
|
+
thinking_output
|
|
1402
|
+
)
|
|
1403
|
+
# Note: Anthropic doesn't break down reasoning tokens separately in usage,
|
|
1404
|
+
# but extended thinking is included in output_tokens
|
|
1405
|
+
token_details[(engine_name, model_name)]["completion_breakdown"][
|
|
1406
|
+
"reasoning_tokens"
|
|
1407
|
+
] += 0
|
|
1408
|
+
elif engine_name == "GeminiXReasoningEngine":
|
|
1409
|
+
usage = metadata["raw_output"].usage_metadata
|
|
1410
|
+
token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
|
|
1411
|
+
usage.prompt_token_count
|
|
1412
|
+
)
|
|
1413
|
+
token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
|
|
1414
|
+
usage.candidates_token_count
|
|
1415
|
+
)
|
|
1416
|
+
token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
|
|
1417
|
+
usage.total_token_count
|
|
1418
|
+
)
|
|
1419
|
+
token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
|
|
1420
|
+
# Track cache tokens if available
|
|
1421
|
+
cache_read = getattr(usage, "cached_content_token_count", 0) or 0
|
|
1422
|
+
token_details[(engine_name, model_name)]["prompt_breakdown"][
|
|
1423
|
+
"cached_tokens"
|
|
1424
|
+
] += cache_read
|
|
1425
|
+
# Track thinking content if available
|
|
1426
|
+
thinking_output = metadata.get("thinking", "")
|
|
1427
|
+
if thinking_output:
|
|
1428
|
+
if "thinking_content" not in token_details[(engine_name, model_name)]:
|
|
1429
|
+
token_details[(engine_name, model_name)]["thinking_content"] = []
|
|
1430
|
+
token_details[(engine_name, model_name)]["thinking_content"].append(
|
|
1431
|
+
thinking_output
|
|
1432
|
+
)
|
|
1433
|
+
# Note: Gemini reasoning tokens are part of candidates_token_count
|
|
1434
|
+
token_details[(engine_name, model_name)]["completion_breakdown"][
|
|
1435
|
+
"reasoning_tokens"
|
|
1436
|
+
] += 0
|
|
1437
|
+
elif engine_name == "DeepSeekXReasoningEngine":
|
|
1438
|
+
usage = metadata["raw_output"].usage
|
|
1439
|
+
token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
|
|
1440
|
+
usage.completion_tokens
|
|
1441
|
+
)
|
|
1442
|
+
token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
|
|
1443
|
+
usage.prompt_tokens
|
|
1444
|
+
)
|
|
1445
|
+
token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
|
|
1446
|
+
usage.total_tokens
|
|
1447
|
+
)
|
|
1448
|
+
token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
|
|
1449
|
+
# Track thinking content if available
|
|
1450
|
+
thinking_output = metadata.get("thinking", "")
|
|
1451
|
+
if thinking_output:
|
|
1452
|
+
if "thinking_content" not in token_details[(engine_name, model_name)]:
|
|
1453
|
+
token_details[(engine_name, model_name)]["thinking_content"] = []
|
|
1454
|
+
token_details[(engine_name, model_name)]["thinking_content"].append(
|
|
1455
|
+
thinking_output
|
|
1456
|
+
)
|
|
1457
|
+
# Note: DeepSeek reasoning tokens might be in completion_tokens_details
|
|
1458
|
+
reasoning_tokens = 0
|
|
1459
|
+
if hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
|
|
1460
|
+
reasoning_tokens = getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
|
|
1461
|
+
token_details[(engine_name, model_name)]["completion_breakdown"][
|
|
1462
|
+
"reasoning_tokens"
|
|
1463
|
+
] += reasoning_tokens
|
|
1353
1464
|
else:
|
|
1354
1465
|
logger.warning(f"Tracking {engine_name} is not supported.")
|
|
1355
1466
|
continue
|
|
@@ -1361,8 +1472,60 @@ class MetadataTracker(Expression):
|
|
|
1361
1472
|
# Convert to normal dict
|
|
1362
1473
|
return {**token_details}
|
|
1363
1474
|
|
|
1475
|
+
def _extract_claude_usage(self, raw_output):
|
|
1476
|
+
"""Extract usage information from Claude response (handles both streaming and non-streaming).
|
|
1477
|
+
|
|
1478
|
+
For non-streaming responses, raw_output is a Message object with a .usage attribute.
|
|
1479
|
+
For streaming responses, raw_output is a list of stream events. Usage info is in:
|
|
1480
|
+
- RawMessageStartEvent.message.usage (input_tokens)
|
|
1481
|
+
- RawMessageDeltaEvent.usage (output_tokens)
|
|
1482
|
+
"""
|
|
1483
|
+
# Non-streaming: raw_output is a Message with .usage
|
|
1484
|
+
if hasattr(raw_output, "usage"):
|
|
1485
|
+
return raw_output.usage
|
|
1486
|
+
|
|
1487
|
+
# Streaming: raw_output is a list of events
|
|
1488
|
+
if isinstance(raw_output, list):
|
|
1489
|
+
# Accumulate usage from stream events
|
|
1490
|
+
input_tokens = 0
|
|
1491
|
+
output_tokens = 0
|
|
1492
|
+
cache_creation = 0
|
|
1493
|
+
cache_read = 0
|
|
1494
|
+
|
|
1495
|
+
for event in raw_output:
|
|
1496
|
+
# RawMessageStartEvent contains initial usage with input_tokens
|
|
1497
|
+
if hasattr(event, "message") and hasattr(event.message, "usage"):
|
|
1498
|
+
msg_usage = event.message.usage
|
|
1499
|
+
input_tokens += getattr(msg_usage, "input_tokens", 0) or 0
|
|
1500
|
+
cache_creation += getattr(msg_usage, "cache_creation_input_tokens", 0) or 0
|
|
1501
|
+
cache_read += getattr(msg_usage, "cache_read_input_tokens", 0) or 0
|
|
1502
|
+
# RawMessageDeltaEvent contains usage with output_tokens
|
|
1503
|
+
elif hasattr(event, "usage") and event.usage is not None:
|
|
1504
|
+
evt_usage = event.usage
|
|
1505
|
+
output_tokens += getattr(evt_usage, "output_tokens", 0) or 0
|
|
1506
|
+
|
|
1507
|
+
# Create a simple object-like dict to hold usage (using Box for attribute access)
|
|
1508
|
+
return Box({
|
|
1509
|
+
"input_tokens": input_tokens,
|
|
1510
|
+
"output_tokens": output_tokens,
|
|
1511
|
+
"cache_creation_input_tokens": cache_creation,
|
|
1512
|
+
"cache_read_input_tokens": cache_read,
|
|
1513
|
+
})
|
|
1514
|
+
|
|
1515
|
+
return None
|
|
1516
|
+
|
|
1364
1517
|
def _can_accumulate_engine(self, engine_name: str) -> bool:
|
|
1365
|
-
supported_engines = (
|
|
1518
|
+
supported_engines = (
|
|
1519
|
+
"GPTXChatEngine",
|
|
1520
|
+
"GPTXReasoningEngine",
|
|
1521
|
+
"GPTXSearchEngine",
|
|
1522
|
+
"ClaudeXChatEngine",
|
|
1523
|
+
"ClaudeXReasoningEngine",
|
|
1524
|
+
"GeminiXReasoningEngine",
|
|
1525
|
+
"DeepSeekXReasoningEngine",
|
|
1526
|
+
"GroqEngine",
|
|
1527
|
+
"CerebrasEngine",
|
|
1528
|
+
)
|
|
1366
1529
|
return engine_name in supported_engines
|
|
1367
1530
|
|
|
1368
1531
|
def _track_parallel_usage_items(self, token_details, engine_name, metadata):
|
|
@@ -1388,21 +1551,48 @@ class MetadataTracker(Expression):
|
|
|
1388
1551
|
|
|
1389
1552
|
metadata_raw_output = metadata["raw_output"]
|
|
1390
1553
|
accumulated_raw_output = accumulated["raw_output"]
|
|
1391
|
-
if not hasattr(metadata_raw_output, "usage") or not hasattr(
|
|
1392
|
-
accumulated_raw_output, "usage"
|
|
1393
|
-
):
|
|
1394
|
-
return
|
|
1395
1554
|
|
|
1396
|
-
|
|
1397
|
-
|
|
1555
|
+
# Handle both OpenAI/Anthropic-style (usage) and Gemini-style (usage_metadata)
|
|
1556
|
+
current_usage = getattr(metadata_raw_output, "usage", None) or getattr(
|
|
1557
|
+
metadata_raw_output, "usage_metadata", None
|
|
1558
|
+
)
|
|
1559
|
+
accumulated_usage = getattr(accumulated_raw_output, "usage", None) or getattr(
|
|
1560
|
+
accumulated_raw_output, "usage_metadata", None
|
|
1561
|
+
)
|
|
1562
|
+
|
|
1563
|
+
if not current_usage or not accumulated_usage:
|
|
1564
|
+
return
|
|
1398
1565
|
|
|
1399
|
-
|
|
1566
|
+
# Handle both OpenAI-style (completion_tokens, prompt_tokens),
|
|
1567
|
+
# Anthropic-style (output_tokens, input_tokens),
|
|
1568
|
+
# and Gemini-style (candidates_token_count, prompt_token_count) fields
|
|
1569
|
+
token_attrs = [
|
|
1570
|
+
"completion_tokens",
|
|
1571
|
+
"prompt_tokens",
|
|
1572
|
+
"total_tokens",
|
|
1573
|
+
"input_tokens",
|
|
1574
|
+
"output_tokens",
|
|
1575
|
+
"candidates_token_count",
|
|
1576
|
+
"prompt_token_count",
|
|
1577
|
+
"total_token_count",
|
|
1578
|
+
]
|
|
1579
|
+
for attr in token_attrs:
|
|
1400
1580
|
if hasattr(current_usage, attr) and hasattr(accumulated_usage, attr):
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1581
|
+
current_val = getattr(current_usage, attr) or 0
|
|
1582
|
+
accumulated_val = getattr(accumulated_usage, attr) or 0
|
|
1583
|
+
setattr(accumulated_usage, attr, accumulated_val + current_val)
|
|
1584
|
+
|
|
1585
|
+
# Handle Anthropic cache tokens and Gemini cached tokens
|
|
1586
|
+
cache_attrs = [
|
|
1587
|
+
"cache_creation_input_tokens",
|
|
1588
|
+
"cache_read_input_tokens",
|
|
1589
|
+
"cached_content_token_count",
|
|
1590
|
+
]
|
|
1591
|
+
for attr in cache_attrs:
|
|
1592
|
+
if hasattr(current_usage, attr) and hasattr(accumulated_usage, attr):
|
|
1593
|
+
current_val = getattr(current_usage, attr) or 0
|
|
1594
|
+
accumulated_val = getattr(accumulated_usage, attr) or 0
|
|
1595
|
+
setattr(accumulated_usage, attr, accumulated_val + current_val)
|
|
1406
1596
|
|
|
1407
1597
|
for detail_attr in ["completion_tokens_details", "prompt_tokens_details"]:
|
|
1408
1598
|
if not hasattr(current_usage, detail_attr) or not hasattr(
|
|
@@ -1508,12 +1698,18 @@ class DynamicEngine(Expression):
|
|
|
1508
1698
|
"""Create an engine instance based on the model name."""
|
|
1509
1699
|
# Deferred to avoid components <-> neurosymbolic engine circular imports.
|
|
1510
1700
|
from .backend.engines.neurosymbolic import ENGINE_MAPPING # noqa
|
|
1511
|
-
from .backend.engines.
|
|
1701
|
+
from .backend.engines.search import SEARCH_ENGINE_MAPPING # noqa
|
|
1512
1702
|
|
|
1513
1703
|
try:
|
|
1704
|
+
# Check neurosymbolic engines first
|
|
1514
1705
|
engine_class = ENGINE_MAPPING.get(self.model)
|
|
1515
|
-
|
|
1516
|
-
|
|
1706
|
+
|
|
1707
|
+
# Check search engines
|
|
1708
|
+
if engine_class is None:
|
|
1709
|
+
engine_class = SEARCH_ENGINE_MAPPING.get(self.model)
|
|
1710
|
+
if engine_class is not None:
|
|
1711
|
+
return engine_class(api_key=self.api_key)
|
|
1712
|
+
|
|
1517
1713
|
if engine_class is None:
|
|
1518
1714
|
UserMessage(f"Unsupported model '{self.model}'", raise_with=ValueError)
|
|
1519
1715
|
return engine_class(api_key=self.api_key, model=self.model)
|