symbolicai 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. symai/__init__.py +21 -71
  2. symai/backend/base.py +0 -26
  3. symai/backend/engines/drawing/engine_gemini_image.py +101 -0
  4. symai/backend/engines/embedding/engine_openai.py +11 -8
  5. symai/backend/engines/neurosymbolic/__init__.py +8 -0
  6. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +14 -1
  7. symai/backend/engines/neurosymbolic/engine_openrouter.py +294 -0
  8. symai/backend/engines/scrape/engine_requests.py +39 -10
  9. symai/backend/engines/search/__init__.py +13 -0
  10. symai/backend/engines/search/engine_firecrawl.py +333 -0
  11. symai/backend/engines/search/engine_parallel.py +5 -5
  12. symai/backend/mixin/__init__.py +4 -0
  13. symai/backend/mixin/openrouter.py +2 -0
  14. symai/components.py +212 -16
  15. symai/extended/interfaces/firecrawl.py +30 -0
  16. symai/extended/interfaces/nanobanana.py +23 -0
  17. symai/extended/interfaces/parallel.py +5 -5
  18. symai/functional.py +3 -4
  19. symai/interfaces.py +2 -0
  20. symai/ops/primitives.py +0 -18
  21. symai/shellsv.py +2 -7
  22. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/METADATA +3 -9
  23. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/RECORD +27 -47
  24. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/WHEEL +1 -1
  25. symai/backend/driver/webclient.py +0 -217
  26. symai/backend/engines/crawler/engine_selenium.py +0 -94
  27. symai/backend/engines/drawing/engine_dall_e.py +0 -131
  28. symai/backend/engines/embedding/engine_plugin_embeddings.py +0 -12
  29. symai/backend/engines/experiments/engine_bard_wrapper.py +0 -131
  30. symai/backend/engines/experiments/engine_gptfinetuner.py +0 -32
  31. symai/backend/engines/experiments/engine_llamacpp_completion.py +0 -142
  32. symai/backend/engines/neurosymbolic/engine_openai_gptX_completion.py +0 -277
  33. symai/collect/__init__.py +0 -8
  34. symai/collect/dynamic.py +0 -117
  35. symai/collect/pipeline.py +0 -156
  36. symai/collect/stats.py +0 -434
  37. symai/extended/crawler.py +0 -21
  38. symai/extended/interfaces/selenium.py +0 -18
  39. symai/extended/interfaces/vectordb.py +0 -21
  40. symai/extended/personas/__init__.py +0 -3
  41. symai/extended/personas/builder.py +0 -105
  42. symai/extended/personas/dialogue.py +0 -126
  43. symai/extended/personas/persona.py +0 -154
  44. symai/extended/personas/research/__init__.py +0 -1
  45. symai/extended/personas/research/yann_lecun.py +0 -62
  46. symai/extended/personas/sales/__init__.py +0 -1
  47. symai/extended/personas/sales/erik_james.py +0 -62
  48. symai/extended/personas/student/__init__.py +0 -1
  49. symai/extended/personas/student/max_tenner.py +0 -51
  50. symai/extended/strategies/__init__.py +0 -1
  51. symai/extended/strategies/cot.py +0 -40
  52. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/entry_points.txt +0 -0
  53. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/licenses/LICENSE +0 -0
  54. {symbolicai-1.4.0.dist-info → symbolicai-1.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,333 @@
1
+ import json
2
+ import logging
3
+ from copy import deepcopy
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
7
+
8
+ from firecrawl import Firecrawl
9
+ from firecrawl.v2.types import ScrapeOptions
10
+
11
+ from ....symbol import Result
12
+ from ....utils import UserMessage
13
+ from ...base import Engine
14
+ from ...settings import SYMAI_CONFIG
15
+
16
+ logging.getLogger("requests").setLevel(logging.ERROR)
17
+ logging.getLogger("urllib3").setLevel(logging.ERROR)
18
+ logging.getLogger("httpx").setLevel(logging.ERROR)
19
+
20
+ TRACKING_KEYS = {
21
+ "utm_source",
22
+ "utm_medium",
23
+ "utm_campaign",
24
+ "utm_term",
25
+ "utm_content",
26
+ }
27
+
28
+
29
+ @dataclass
30
+ class Citation:
31
+ id: int
32
+ title: str
33
+ url: str
34
+ start: int
35
+ end: int
36
+
37
+ def __hash__(self):
38
+ return hash((self.url,))
39
+
40
+
41
+ class FirecrawlSearchResult(Result):
42
+ def __init__(
43
+ self, value: dict[str, Any] | Any, max_chars_per_result: int | None = None, **kwargs
44
+ ) -> None:
45
+ raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
46
+ super().__init__(raw_dict, **kwargs)
47
+ self._citations: list[Citation] = []
48
+ self._max_chars_per_result = max_chars_per_result
49
+ try:
50
+ text, citations = self._build_text_and_citations(raw_dict)
51
+ self._value = text
52
+ self._citations = citations
53
+ except Exception as e:
54
+ self._value = None
55
+ UserMessage(f"Failed to parse Firecrawl search response: {e}", raise_with=ValueError)
56
+
57
+ def _build_text_and_citations(self, data: dict[str, Any]) -> tuple[str, list[Citation]]:
58
+ results = []
59
+ for source in ["web", "news", "images"]:
60
+ source_data = data.get(source) or []
61
+ results.extend(source_data)
62
+
63
+ if not results:
64
+ return "", []
65
+
66
+ parts = []
67
+ citations = []
68
+ cursor = 0
69
+
70
+ for idx, item in enumerate(results, 1):
71
+ # Handle both SearchResultWeb (url/title at top level) and Document (url/title in metadata)
72
+ metadata = item.get("metadata") or {}
73
+ url = item.get("url") or metadata.get("url") or metadata.get("source_url") or ""
74
+ title = item.get("title") or metadata.get("title") or ""
75
+
76
+ if not url:
77
+ continue
78
+
79
+ # Check if this is a scraped result (has markdown content)
80
+ markdown = item.get("markdown", "")
81
+ if markdown:
82
+ content = markdown
83
+ if self._max_chars_per_result and len(content) > self._max_chars_per_result:
84
+ content = content[: self._max_chars_per_result] + "..."
85
+ result_text = f"{title}\n{url}\n{content}"
86
+ else:
87
+ description = (
88
+ item.get("description")
89
+ or item.get("snippet")
90
+ or metadata.get("description")
91
+ or ""
92
+ )
93
+ result_text = f"{title}\n{url}"
94
+ if description:
95
+ if self._max_chars_per_result and len(description) > self._max_chars_per_result:
96
+ description = description[: self._max_chars_per_result] + "..."
97
+ result_text += f"\n{description}"
98
+
99
+ if parts:
100
+ parts.append("\n\n")
101
+ cursor += 2
102
+
103
+ parts.append(result_text)
104
+ cursor += len(result_text)
105
+
106
+ marker = f"[{idx}]"
107
+ start = cursor
108
+ parts.append(marker)
109
+ cursor += len(marker)
110
+
111
+ citations.append(Citation(id=idx, title=title, url=url, start=start, end=cursor))
112
+
113
+ text = "".join(parts)
114
+ return text, citations
115
+
116
+ def __str__(self) -> str:
117
+ if isinstance(self._value, str) and self._value:
118
+ return self._value
119
+ try:
120
+ return json.dumps(self.raw, indent=2)
121
+ except TypeError:
122
+ return str(self.raw)
123
+
124
+ def _repr_html_(self) -> str:
125
+ if isinstance(self._value, str) and self._value:
126
+ return f"<pre>{self._value}</pre>"
127
+ try:
128
+ return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
129
+ except Exception:
130
+ return f"<pre>{self.raw!s}</pre>"
131
+
132
+ def get_citations(self) -> list[Citation]:
133
+ return self._citations
134
+
135
+
136
+ class FirecrawlExtractResult(Result):
137
+ """Result wrapper for Firecrawl scrape API responses."""
138
+
139
+ def __init__(self, value: Any, **kwargs) -> None:
140
+ raw_dict = value.model_dump() if hasattr(value, "model_dump") else value
141
+ super().__init__(raw_dict, **kwargs)
142
+ try:
143
+ self._value = self._extract_content(raw_dict)
144
+ except Exception as e:
145
+ self._value = None
146
+ UserMessage(f"Failed to parse Firecrawl scrape response: {e}", raise_with=ValueError)
147
+
148
+ def _extract_content(self, data: dict[str, Any]) -> str:
149
+ content = data.get("markdown") or data.get("html") or data.get("raw_html")
150
+ if content:
151
+ return str(content)
152
+ json_data = data.get("json")
153
+ if json_data:
154
+ return json.dumps(json_data, indent=2)
155
+ return ""
156
+
157
+ def __str__(self) -> str:
158
+ try:
159
+ return str(self._value or "")
160
+ except Exception:
161
+ return ""
162
+
163
+ def _repr_html_(self) -> str:
164
+ try:
165
+ return f"<pre>{self._value or ''}</pre>"
166
+ except Exception:
167
+ return "<pre></pre>"
168
+
169
+
170
+ class FirecrawlEngine(Engine):
171
+ def __init__(self, api_key: str | None = None):
172
+ super().__init__()
173
+ self.config = deepcopy(SYMAI_CONFIG)
174
+ self.api_key = api_key or self.config.get("SEARCH_ENGINE_API_KEY")
175
+ self.model = self.config.get("SEARCH_ENGINE_MODEL")
176
+ self.name = self.__class__.__name__
177
+
178
+ if not self.api_key:
179
+ UserMessage(
180
+ "Firecrawl API key not found. Set SEARCH_ENGINE_API_KEY in config or environment.",
181
+ raise_with=ValueError,
182
+ )
183
+
184
+ try:
185
+ self.client = Firecrawl(api_key=self.api_key)
186
+ except Exception as e:
187
+ UserMessage(f"Failed to initialize Firecrawl client: {e}", raise_with=ValueError)
188
+
189
+ def id(self) -> str:
190
+ if (
191
+ self.config.get("SEARCH_ENGINE_API_KEY")
192
+ and str(self.config.get("SEARCH_ENGINE_MODEL", "")).lower() == "firecrawl"
193
+ ):
194
+ return "search"
195
+ return super().id()
196
+
197
+ def command(self, *args, **kwargs):
198
+ super().command(*args, **kwargs)
199
+ if "SEARCH_ENGINE_API_KEY" in kwargs:
200
+ self.api_key = kwargs["SEARCH_ENGINE_API_KEY"]
201
+ if "SEARCH_ENGINE_MODEL" in kwargs:
202
+ self.model = kwargs["SEARCH_ENGINE_MODEL"]
203
+
204
+ def _normalize_url(self, url: str) -> str:
205
+ parts = urlsplit(url)
206
+ filtered_query = [
207
+ (k, v)
208
+ for k, v in parse_qsl(parts.query, keep_blank_values=True)
209
+ if k not in TRACKING_KEYS and not k.lower().startswith("utm_")
210
+ ]
211
+ query = urlencode(filtered_query, doseq=True)
212
+ return urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment))
213
+
214
+ def _search(self, query: str, kwargs: dict[str, Any]):
215
+ if not query:
216
+ UserMessage(
217
+ "FirecrawlEngine._search requires a non-empty query.", raise_with=ValueError
218
+ )
219
+
220
+ max_chars_per_result = kwargs.get("max_chars_per_result")
221
+
222
+ # Build search kwargs
223
+ search_kwargs = {}
224
+ if "limit" in kwargs:
225
+ search_kwargs["limit"] = kwargs["limit"]
226
+ if "location" in kwargs:
227
+ search_kwargs["location"] = kwargs["location"]
228
+ if "tbs" in kwargs:
229
+ search_kwargs["tbs"] = kwargs["tbs"]
230
+ if "sources" in kwargs:
231
+ search_kwargs["sources"] = kwargs["sources"]
232
+ if "categories" in kwargs:
233
+ search_kwargs["categories"] = kwargs["categories"]
234
+ if "timeout" in kwargs:
235
+ search_kwargs["timeout"] = kwargs["timeout"]
236
+
237
+ # Build scrape options for search results content
238
+ scrape_opts = {}
239
+ if "formats" in kwargs:
240
+ scrape_opts["formats"] = kwargs["formats"]
241
+ if "proxy" in kwargs:
242
+ scrape_opts["proxy"] = kwargs["proxy"]
243
+ if "only_main_content" in kwargs:
244
+ scrape_opts["only_main_content"] = kwargs["only_main_content"]
245
+ if "scrape_location" in kwargs:
246
+ scrape_opts["location"] = kwargs["scrape_location"]
247
+ if "include_tags" in kwargs:
248
+ scrape_opts["include_tags"] = kwargs["include_tags"]
249
+ if "exclude_tags" in kwargs:
250
+ scrape_opts["exclude_tags"] = kwargs["exclude_tags"]
251
+
252
+ if scrape_opts:
253
+ search_kwargs["scrape_options"] = ScrapeOptions(**scrape_opts)
254
+
255
+ try:
256
+ result = self.client.search(query, **search_kwargs)
257
+ except Exception as e:
258
+ UserMessage(f"Failed to call Firecrawl Search API: {e}", raise_with=ValueError)
259
+
260
+ raw = result.model_dump() if hasattr(result, "model_dump") else result
261
+ return [FirecrawlSearchResult(result, max_chars_per_result=max_chars_per_result)], {
262
+ "raw_output": raw
263
+ }
264
+
265
+ def _extract(self, url: str, kwargs: dict[str, Any]):
266
+ normalized_url = self._normalize_url(url)
267
+
268
+ # Build scrape kwargs
269
+ scrape_kwargs = {"formats": kwargs.get("formats", ["markdown"])}
270
+ if "only_main_content" in kwargs:
271
+ scrape_kwargs["only_main_content"] = kwargs["only_main_content"]
272
+ if "timeout" in kwargs:
273
+ scrape_kwargs["timeout"] = kwargs["timeout"]
274
+ if "proxy" in kwargs:
275
+ scrape_kwargs["proxy"] = kwargs["proxy"]
276
+ if "location" in kwargs:
277
+ scrape_kwargs["location"] = kwargs["location"]
278
+ if "max_age" in kwargs:
279
+ scrape_kwargs["max_age"] = kwargs["max_age"]
280
+ if "store_in_cache" in kwargs:
281
+ scrape_kwargs["store_in_cache"] = kwargs["store_in_cache"]
282
+ if "actions" in kwargs:
283
+ scrape_kwargs["actions"] = kwargs["actions"]
284
+ if "headers" in kwargs:
285
+ scrape_kwargs["headers"] = kwargs["headers"]
286
+ if "include_tags" in kwargs:
287
+ scrape_kwargs["include_tags"] = kwargs["include_tags"]
288
+ if "exclude_tags" in kwargs:
289
+ scrape_kwargs["exclude_tags"] = kwargs["exclude_tags"]
290
+ if "wait_for" in kwargs:
291
+ scrape_kwargs["wait_for"] = kwargs["wait_for"]
292
+ if "mobile" in kwargs:
293
+ scrape_kwargs["mobile"] = kwargs["mobile"]
294
+
295
+ try:
296
+ result = self.client.scrape(normalized_url, **scrape_kwargs)
297
+ except Exception as e:
298
+ UserMessage(f"Failed to call Firecrawl Scrape API: {e}", raise_with=ValueError)
299
+
300
+ raw = result.model_dump() if hasattr(result, "model_dump") else result
301
+ return [FirecrawlExtractResult(result)], {"raw_output": raw, "final_url": normalized_url}
302
+
303
+ def forward(self, argument):
304
+ kwargs = argument.kwargs
305
+ url = argument.prop.url or kwargs.get("url")
306
+ if url:
307
+ return self._extract(str(url), kwargs)
308
+
309
+ raw_query = argument.prop.prepared_input
310
+ if raw_query is None:
311
+ raw_query = argument.prop.query
312
+
313
+ query = str(raw_query or "").strip() if raw_query else ""
314
+ if not query:
315
+ UserMessage(
316
+ "FirecrawlEngine.forward requires at least one non-empty query or url.",
317
+ raise_with=ValueError,
318
+ )
319
+
320
+ return self._search(query, kwargs)
321
+
322
+ def prepare(self, argument):
323
+ url = argument.kwargs.get("url") or argument.prop.url
324
+ if url:
325
+ argument.prop.prepared_input = str(url)
326
+ return
327
+
328
+ query = argument.prop.query
329
+ if isinstance(query, list):
330
+ argument.prop.prepared_input = " ".join(str(q) for q in query if q)
331
+ return
332
+
333
+ argument.prop.prepared_input = str(query or "").strip()
@@ -66,7 +66,7 @@ class Citation:
66
66
  return hash((self.url,))
67
67
 
68
68
 
69
- class SearchResult(Result):
69
+ class ParallelSearchResult(Result):
70
70
  def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
71
71
  super().__init__(value, **kwargs)
72
72
  if isinstance(value, dict) and value.get("error"):
@@ -286,7 +286,7 @@ class SearchResult(Result):
286
286
  return self._citations
287
287
 
288
288
 
289
- class ExtractResult(Result):
289
+ class ParallelExtractResult(Result):
290
290
  """Result wrapper for Parallel Extract API responses."""
291
291
 
292
292
  def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
@@ -485,7 +485,7 @@ class ParallelEngine(Engine):
485
485
  )
486
486
  except Exception as e:
487
487
  UserMessage(f"Failed to call Parallel Search API: {e}", raise_with=ValueError)
488
- return [SearchResult(result)], {"raw_output": result}
488
+ return [ParallelSearchResult(result)], {"raw_output": result}
489
489
 
490
490
  def _task(self, queries: list[str], kwargs: dict[str, Any]):
491
491
  processor_name = self._coerce_processor(kwargs.get("processor"))
@@ -521,7 +521,7 @@ class ParallelEngine(Engine):
521
521
  result = self._fetch_task_result(run.run_id, timeout=timeout, api_timeout=api_timeout)
522
522
 
523
523
  payload = self._task_result_to_search_payload(result)
524
- return [SearchResult(payload)], {
524
+ return [ParallelSearchResult(payload)], {
525
525
  "raw_output": result,
526
526
  "task_output": payload.get("task_output"),
527
527
  "task_output_type": payload.get("task_output_type"),
@@ -699,7 +699,7 @@ class ParallelEngine(Engine):
699
699
  )
700
700
  except Exception as e:
701
701
  UserMessage(f"Failed to call Parallel Extract API: {e}", raise_with=ValueError)
702
- return [ExtractResult(result)], {"raw_output": result, "final_url": url}
702
+ return [ParallelExtractResult(result)], {"raw_output": result, "final_url": url}
703
703
 
704
704
  def forward(self, argument):
705
705
  kwargs = argument.kwargs
@@ -11,6 +11,8 @@ from .groq import SUPPORTED_REASONING_MODELS as GROQ_REASONING_MODELS
11
11
  from .openai import SUPPORTED_CHAT_MODELS as OPENAI_CHAT_MODELS
12
12
  from .openai import SUPPORTED_REASONING_MODELS as OPENAI_REASONING_MODELS
13
13
  from .openai import SUPPORTED_RESPONSES_MODELS as OPENAI_RESPONSES_MODELS
14
+ from .openrouter import SUPPORTED_CHAT_MODELS as OPENROUTER_CHAT_MODELS
15
+ from .openrouter import SUPPORTED_REASONING_MODELS as OPENROUTER_REASONING_MODELS
14
16
 
15
17
  __all__ = [
16
18
  "ANTHROPIC_CHAT_MODELS",
@@ -26,4 +28,6 @@ __all__ = [
26
28
  "OPENAI_CHAT_MODELS",
27
29
  "OPENAI_REASONING_MODELS",
28
30
  "OPENAI_RESPONSES_MODELS",
31
+ "OPENROUTER_CHAT_MODELS",
32
+ "OPENROUTER_REASONING_MODELS",
29
33
  ]
@@ -0,0 +1,2 @@
1
+ SUPPORTED_CHAT_MODELS = ["openrouter:moonshotai/kimi-k2.5"]
2
+ SUPPORTED_REASONING_MODELS = []
symai/components.py CHANGED
@@ -1229,6 +1229,7 @@ class MetadataTracker(Expression):
1229
1229
  and frame.f_code.co_name == "forward"
1230
1230
  and "self" in frame.f_locals
1231
1231
  and isinstance(frame.f_locals["self"], Engine)
1232
+ and arg is not None # Ensure arg is not None to avoid unpacking error on exceptions
1232
1233
  ):
1233
1234
  _, metadata = arg # arg contains return value on 'return' event
1234
1235
  engine_name = frame.f_locals["self"].__class__.__name__
@@ -1350,6 +1351,116 @@ class MetadataTracker(Expression):
1350
1351
  token_details[(engine_name, model_name)]["completion_breakdown"][
1351
1352
  "reasoning_tokens"
1352
1353
  ] += 0
1354
+ elif engine_name in ("ClaudeXChatEngine", "ClaudeXReasoningEngine"):
1355
+ raw_output = metadata["raw_output"]
1356
+ usage = self._extract_claude_usage(raw_output)
1357
+ if usage is None:
1358
+ # Skip if we can't extract usage (shouldn't happen normally)
1359
+ logger.warning(f"Could not extract usage from {engine_name} response.")
1360
+ token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
1361
+ token_details[(engine_name, model_name)]["prompt_breakdown"][
1362
+ "cached_tokens"
1363
+ ] += 0
1364
+ token_details[(engine_name, model_name)]["completion_breakdown"][
1365
+ "reasoning_tokens"
1366
+ ] += 0
1367
+ continue
1368
+ input_tokens = getattr(usage, "input_tokens", 0) or 0
1369
+ output_tokens = getattr(usage, "output_tokens", 0) or 0
1370
+ token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
1371
+ input_tokens
1372
+ )
1373
+ token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
1374
+ output_tokens
1375
+ )
1376
+ # Calculate total tokens
1377
+ total = input_tokens + output_tokens
1378
+ token_details[(engine_name, model_name)]["usage"]["total_tokens"] += total
1379
+ token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
1380
+ # Track cache tokens if available
1381
+ cache_creation = getattr(usage, "cache_creation_input_tokens", 0) or 0
1382
+ cache_read = getattr(usage, "cache_read_input_tokens", 0) or 0
1383
+ token_details[(engine_name, model_name)]["prompt_breakdown"][
1384
+ "cache_creation_tokens"
1385
+ ] += cache_creation
1386
+ token_details[(engine_name, model_name)]["prompt_breakdown"][
1387
+ "cache_read_tokens"
1388
+ ] += cache_read
1389
+ # For backward compatibility, also track as cached_tokens
1390
+ token_details[(engine_name, model_name)]["prompt_breakdown"][
1391
+ "cached_tokens"
1392
+ ] += cache_read
1393
+ # Track reasoning/thinking tokens for ClaudeXReasoningEngine
1394
+ if engine_name == "ClaudeXReasoningEngine":
1395
+ thinking_output = metadata.get("thinking", "")
1396
+ # Store thinking content if available
1397
+ if thinking_output:
1398
+ if "thinking_content" not in token_details[(engine_name, model_name)]:
1399
+ token_details[(engine_name, model_name)]["thinking_content"] = []
1400
+ token_details[(engine_name, model_name)]["thinking_content"].append(
1401
+ thinking_output
1402
+ )
1403
+ # Note: Anthropic doesn't break down reasoning tokens separately in usage,
1404
+ # but extended thinking is included in output_tokens
1405
+ token_details[(engine_name, model_name)]["completion_breakdown"][
1406
+ "reasoning_tokens"
1407
+ ] += 0
1408
+ elif engine_name == "GeminiXReasoningEngine":
1409
+ usage = metadata["raw_output"].usage_metadata
1410
+ token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
1411
+ usage.prompt_token_count
1412
+ )
1413
+ token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
1414
+ usage.candidates_token_count
1415
+ )
1416
+ token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
1417
+ usage.total_token_count
1418
+ )
1419
+ token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
1420
+ # Track cache tokens if available
1421
+ cache_read = getattr(usage, "cached_content_token_count", 0) or 0
1422
+ token_details[(engine_name, model_name)]["prompt_breakdown"][
1423
+ "cached_tokens"
1424
+ ] += cache_read
1425
+ # Track thinking content if available
1426
+ thinking_output = metadata.get("thinking", "")
1427
+ if thinking_output:
1428
+ if "thinking_content" not in token_details[(engine_name, model_name)]:
1429
+ token_details[(engine_name, model_name)]["thinking_content"] = []
1430
+ token_details[(engine_name, model_name)]["thinking_content"].append(
1431
+ thinking_output
1432
+ )
1433
+ # Note: Gemini reasoning tokens are part of candidates_token_count
1434
+ token_details[(engine_name, model_name)]["completion_breakdown"][
1435
+ "reasoning_tokens"
1436
+ ] += 0
1437
+ elif engine_name == "DeepSeekXReasoningEngine":
1438
+ usage = metadata["raw_output"].usage
1439
+ token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
1440
+ usage.completion_tokens
1441
+ )
1442
+ token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
1443
+ usage.prompt_tokens
1444
+ )
1445
+ token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
1446
+ usage.total_tokens
1447
+ )
1448
+ token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
1449
+ # Track thinking content if available
1450
+ thinking_output = metadata.get("thinking", "")
1451
+ if thinking_output:
1452
+ if "thinking_content" not in token_details[(engine_name, model_name)]:
1453
+ token_details[(engine_name, model_name)]["thinking_content"] = []
1454
+ token_details[(engine_name, model_name)]["thinking_content"].append(
1455
+ thinking_output
1456
+ )
1457
+ # Note: DeepSeek reasoning tokens might be in completion_tokens_details
1458
+ reasoning_tokens = 0
1459
+ if hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
1460
+ reasoning_tokens = getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
1461
+ token_details[(engine_name, model_name)]["completion_breakdown"][
1462
+ "reasoning_tokens"
1463
+ ] += reasoning_tokens
1353
1464
  else:
1354
1465
  logger.warning(f"Tracking {engine_name} is not supported.")
1355
1466
  continue
@@ -1361,8 +1472,60 @@ class MetadataTracker(Expression):
1361
1472
  # Convert to normal dict
1362
1473
  return {**token_details}
1363
1474
 
1475
+ def _extract_claude_usage(self, raw_output):
1476
+ """Extract usage information from Claude response (handles both streaming and non-streaming).
1477
+
1478
+ For non-streaming responses, raw_output is a Message object with a .usage attribute.
1479
+ For streaming responses, raw_output is a list of stream events. Usage info is in:
1480
+ - RawMessageStartEvent.message.usage (input_tokens)
1481
+ - RawMessageDeltaEvent.usage (output_tokens)
1482
+ """
1483
+ # Non-streaming: raw_output is a Message with .usage
1484
+ if hasattr(raw_output, "usage"):
1485
+ return raw_output.usage
1486
+
1487
+ # Streaming: raw_output is a list of events
1488
+ if isinstance(raw_output, list):
1489
+ # Accumulate usage from stream events
1490
+ input_tokens = 0
1491
+ output_tokens = 0
1492
+ cache_creation = 0
1493
+ cache_read = 0
1494
+
1495
+ for event in raw_output:
1496
+ # RawMessageStartEvent contains initial usage with input_tokens
1497
+ if hasattr(event, "message") and hasattr(event.message, "usage"):
1498
+ msg_usage = event.message.usage
1499
+ input_tokens += getattr(msg_usage, "input_tokens", 0) or 0
1500
+ cache_creation += getattr(msg_usage, "cache_creation_input_tokens", 0) or 0
1501
+ cache_read += getattr(msg_usage, "cache_read_input_tokens", 0) or 0
1502
+ # RawMessageDeltaEvent contains usage with output_tokens
1503
+ elif hasattr(event, "usage") and event.usage is not None:
1504
+ evt_usage = event.usage
1505
+ output_tokens += getattr(evt_usage, "output_tokens", 0) or 0
1506
+
1507
+ # Create a simple object-like dict to hold usage (using Box for attribute access)
1508
+ return Box({
1509
+ "input_tokens": input_tokens,
1510
+ "output_tokens": output_tokens,
1511
+ "cache_creation_input_tokens": cache_creation,
1512
+ "cache_read_input_tokens": cache_read,
1513
+ })
1514
+
1515
+ return None
1516
+
1364
1517
  def _can_accumulate_engine(self, engine_name: str) -> bool:
1365
- supported_engines = ("GPTXChatEngine", "GPTXReasoningEngine", "GPTXSearchEngine")
1518
+ supported_engines = (
1519
+ "GPTXChatEngine",
1520
+ "GPTXReasoningEngine",
1521
+ "GPTXSearchEngine",
1522
+ "ClaudeXChatEngine",
1523
+ "ClaudeXReasoningEngine",
1524
+ "GeminiXReasoningEngine",
1525
+ "DeepSeekXReasoningEngine",
1526
+ "GroqEngine",
1527
+ "CerebrasEngine",
1528
+ )
1366
1529
  return engine_name in supported_engines
1367
1530
 
1368
1531
  def _track_parallel_usage_items(self, token_details, engine_name, metadata):
@@ -1388,21 +1551,48 @@ class MetadataTracker(Expression):
1388
1551
 
1389
1552
  metadata_raw_output = metadata["raw_output"]
1390
1553
  accumulated_raw_output = accumulated["raw_output"]
1391
- if not hasattr(metadata_raw_output, "usage") or not hasattr(
1392
- accumulated_raw_output, "usage"
1393
- ):
1394
- return
1395
1554
 
1396
- current_usage = metadata_raw_output.usage
1397
- accumulated_usage = accumulated_raw_output.usage
1555
+ # Handle both OpenAI/Anthropic-style (usage) and Gemini-style (usage_metadata)
1556
+ current_usage = getattr(metadata_raw_output, "usage", None) or getattr(
1557
+ metadata_raw_output, "usage_metadata", None
1558
+ )
1559
+ accumulated_usage = getattr(accumulated_raw_output, "usage", None) or getattr(
1560
+ accumulated_raw_output, "usage_metadata", None
1561
+ )
1562
+
1563
+ if not current_usage or not accumulated_usage:
1564
+ return
1398
1565
 
1399
- for attr in ["completion_tokens", "prompt_tokens", "total_tokens"]:
1566
+ # Handle both OpenAI-style (completion_tokens, prompt_tokens),
1567
+ # Anthropic-style (output_tokens, input_tokens),
1568
+ # and Gemini-style (candidates_token_count, prompt_token_count) fields
1569
+ token_attrs = [
1570
+ "completion_tokens",
1571
+ "prompt_tokens",
1572
+ "total_tokens",
1573
+ "input_tokens",
1574
+ "output_tokens",
1575
+ "candidates_token_count",
1576
+ "prompt_token_count",
1577
+ "total_token_count",
1578
+ ]
1579
+ for attr in token_attrs:
1400
1580
  if hasattr(current_usage, attr) and hasattr(accumulated_usage, attr):
1401
- setattr(
1402
- accumulated_usage,
1403
- attr,
1404
- getattr(accumulated_usage, attr) + getattr(current_usage, attr),
1405
- )
1581
+ current_val = getattr(current_usage, attr) or 0
1582
+ accumulated_val = getattr(accumulated_usage, attr) or 0
1583
+ setattr(accumulated_usage, attr, accumulated_val + current_val)
1584
+
1585
+ # Handle Anthropic cache tokens and Gemini cached tokens
1586
+ cache_attrs = [
1587
+ "cache_creation_input_tokens",
1588
+ "cache_read_input_tokens",
1589
+ "cached_content_token_count",
1590
+ ]
1591
+ for attr in cache_attrs:
1592
+ if hasattr(current_usage, attr) and hasattr(accumulated_usage, attr):
1593
+ current_val = getattr(current_usage, attr) or 0
1594
+ accumulated_val = getattr(accumulated_usage, attr) or 0
1595
+ setattr(accumulated_usage, attr, accumulated_val + current_val)
1406
1596
 
1407
1597
  for detail_attr in ["completion_tokens_details", "prompt_tokens_details"]:
1408
1598
  if not hasattr(current_usage, detail_attr) or not hasattr(
@@ -1508,12 +1698,18 @@ class DynamicEngine(Expression):
1508
1698
  """Create an engine instance based on the model name."""
1509
1699
  # Deferred to avoid components <-> neurosymbolic engine circular imports.
1510
1700
  from .backend.engines.neurosymbolic import ENGINE_MAPPING # noqa
1511
- from .backend.engines.neurosymbolic.engine_cerebras import CerebrasEngine # noqa
1701
+ from .backend.engines.search import SEARCH_ENGINE_MAPPING # noqa
1512
1702
 
1513
1703
  try:
1704
+ # Check neurosymbolic engines first
1514
1705
  engine_class = ENGINE_MAPPING.get(self.model)
1515
- if engine_class is None and self.model.startswith("cerebras:"):
1516
- engine_class = CerebrasEngine
1706
+
1707
+ # Check search engines
1708
+ if engine_class is None:
1709
+ engine_class = SEARCH_ENGINE_MAPPING.get(self.model)
1710
+ if engine_class is not None:
1711
+ return engine_class(api_key=self.api_key)
1712
+
1517
1713
  if engine_class is None:
1518
1714
  UserMessage(f"Unsupported model '{self.model}'", raise_with=ValueError)
1519
1715
  return engine_class(api_key=self.api_key, model=self.model)