symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. symai/__init__.py +269 -173
  2. symai/backend/base.py +123 -110
  3. symai/backend/engines/drawing/engine_bfl.py +45 -44
  4. symai/backend/engines/drawing/engine_gpt_image.py +112 -97
  5. symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
  6. symai/backend/engines/embedding/engine_openai.py +25 -21
  7. symai/backend/engines/execute/engine_python.py +19 -18
  8. symai/backend/engines/files/engine_io.py +104 -95
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
  11. symai/backend/engines/index/engine_pinecone.py +124 -97
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +84 -56
  14. symai/backend/engines/lean/engine_lean4.py +96 -52
  15. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
  21. symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
  26. symai/backend/engines/ocr/engine_apilayer.py +23 -27
  27. symai/backend/engines/output/engine_stdout.py +10 -13
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
  29. symai/backend/engines/search/engine_openai.py +100 -88
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +44 -45
  32. symai/backend/engines/search/engine_serpapi.py +37 -34
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
  35. symai/backend/engines/text_to_speech/engine_openai.py +20 -26
  36. symai/backend/engines/text_vision/engine_clip.py +39 -37
  37. symai/backend/engines/userinput/engine_console.py +5 -6
  38. symai/backend/mixin/__init__.py +13 -0
  39. symai/backend/mixin/anthropic.py +48 -38
  40. symai/backend/mixin/deepseek.py +6 -5
  41. symai/backend/mixin/google.py +7 -4
  42. symai/backend/mixin/groq.py +2 -4
  43. symai/backend/mixin/openai.py +140 -110
  44. symai/backend/settings.py +87 -20
  45. symai/chat.py +216 -123
  46. symai/collect/__init__.py +7 -1
  47. symai/collect/dynamic.py +80 -70
  48. symai/collect/pipeline.py +67 -51
  49. symai/collect/stats.py +161 -109
  50. symai/components.py +707 -360
  51. symai/constraints.py +24 -12
  52. symai/core.py +1857 -1233
  53. symai/core_ext.py +83 -80
  54. symai/endpoints/api.py +166 -104
  55. symai/extended/.DS_Store +0 -0
  56. symai/extended/__init__.py +46 -12
  57. symai/extended/api_builder.py +29 -21
  58. symai/extended/arxiv_pdf_parser.py +23 -14
  59. symai/extended/bibtex_parser.py +9 -6
  60. symai/extended/conversation.py +156 -126
  61. symai/extended/document.py +50 -30
  62. symai/extended/file_merger.py +57 -14
  63. symai/extended/graph.py +51 -32
  64. symai/extended/html_style_template.py +18 -14
  65. symai/extended/interfaces/blip_2.py +2 -3
  66. symai/extended/interfaces/clip.py +4 -3
  67. symai/extended/interfaces/console.py +9 -1
  68. symai/extended/interfaces/dall_e.py +4 -2
  69. symai/extended/interfaces/file.py +2 -0
  70. symai/extended/interfaces/flux.py +4 -2
  71. symai/extended/interfaces/gpt_image.py +16 -7
  72. symai/extended/interfaces/input.py +2 -1
  73. symai/extended/interfaces/llava.py +1 -2
  74. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
  75. symai/extended/interfaces/naive_vectordb.py +9 -10
  76. symai/extended/interfaces/ocr.py +5 -3
  77. symai/extended/interfaces/openai_search.py +2 -0
  78. symai/extended/interfaces/parallel.py +30 -0
  79. symai/extended/interfaces/perplexity.py +2 -0
  80. symai/extended/interfaces/pinecone.py +12 -9
  81. symai/extended/interfaces/python.py +2 -0
  82. symai/extended/interfaces/serpapi.py +3 -1
  83. symai/extended/interfaces/terminal.py +2 -4
  84. symai/extended/interfaces/tts.py +3 -2
  85. symai/extended/interfaces/whisper.py +3 -2
  86. symai/extended/interfaces/wolframalpha.py +2 -1
  87. symai/extended/metrics/__init__.py +11 -1
  88. symai/extended/metrics/similarity.py +14 -13
  89. symai/extended/os_command.py +39 -29
  90. symai/extended/packages/__init__.py +29 -3
  91. symai/extended/packages/symdev.py +51 -43
  92. symai/extended/packages/sympkg.py +41 -35
  93. symai/extended/packages/symrun.py +63 -50
  94. symai/extended/repo_cloner.py +14 -12
  95. symai/extended/seo_query_optimizer.py +15 -13
  96. symai/extended/solver.py +116 -91
  97. symai/extended/summarizer.py +12 -10
  98. symai/extended/taypan_interpreter.py +17 -18
  99. symai/extended/vectordb.py +122 -92
  100. symai/formatter/__init__.py +9 -1
  101. symai/formatter/formatter.py +51 -47
  102. symai/formatter/regex.py +70 -69
  103. symai/functional.py +325 -176
  104. symai/imports.py +190 -147
  105. symai/interfaces.py +57 -28
  106. symai/memory.py +45 -35
  107. symai/menu/screen.py +28 -19
  108. symai/misc/console.py +66 -56
  109. symai/misc/loader.py +8 -5
  110. symai/models/__init__.py +17 -1
  111. symai/models/base.py +395 -236
  112. symai/models/errors.py +1 -2
  113. symai/ops/__init__.py +32 -22
  114. symai/ops/measures.py +24 -25
  115. symai/ops/primitives.py +1149 -731
  116. symai/post_processors.py +58 -50
  117. symai/pre_processors.py +86 -82
  118. symai/processor.py +21 -13
  119. symai/prompts.py +764 -685
  120. symai/server/huggingface_server.py +135 -49
  121. symai/server/llama_cpp_server.py +21 -11
  122. symai/server/qdrant_server.py +206 -0
  123. symai/shell.py +100 -42
  124. symai/shellsv.py +700 -492
  125. symai/strategy.py +630 -346
  126. symai/symbol.py +368 -322
  127. symai/utils.py +100 -78
  128. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
  129. symbolicai-1.1.0.dist-info/RECORD +168 -0
  130. symbolicai-0.21.0.dist-info/RECORD +0 -162
  131. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  132. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  133. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  134. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -2,20 +2,24 @@ import json
2
2
  import logging
3
3
  import re
4
4
  from copy import copy, deepcopy
5
- from typing import List, Optional
6
5
 
7
6
  import anthropic
8
7
  from anthropic._types import NOT_GIVEN
9
- from anthropic.types import (InputJSONDelta, Message,
10
- RawContentBlockDeltaEvent,
11
- RawContentBlockStartEvent,
12
- RawContentBlockStopEvent, TextBlock, TextDelta,
13
- ThinkingBlock, ThinkingDelta, ToolUseBlock)
8
+ from anthropic.types import (
9
+ InputJSONDelta,
10
+ Message,
11
+ RawContentBlockDeltaEvent,
12
+ RawContentBlockStartEvent,
13
+ RawContentBlockStopEvent,
14
+ TextBlock,
15
+ TextDelta,
16
+ ThinkingBlock,
17
+ ThinkingDelta,
18
+ ToolUseBlock,
19
+ )
14
20
 
15
21
  from ....components import SelfPrompt
16
- from ....misc.console import ConsoleStyle
17
- from ....symbol import Symbol
18
- from ....utils import CustomUserWarning, encode_media_frames
22
+ from ....utils import UserMessage, encode_media_frames
19
23
  from ...base import Engine
20
24
  from ...mixin.anthropic import AnthropicMixin
21
25
  from ...settings import SYMAI_CONFIG
@@ -34,18 +38,19 @@ class TokenizerWrapper:
34
38
  def encode(self, text: str) -> int:
35
39
  return self.compute_tokens_func([{"role": "user", "content": text}])
36
40
 
41
+
37
42
  class ClaudeXReasoningEngine(Engine, AnthropicMixin):
38
- def __init__(self, api_key: Optional[str] = None, model: Optional[str] = None):
43
+ def __init__(self, api_key: str | None = None, model: str | None = None):
39
44
  super().__init__()
40
45
  self.config = deepcopy(SYMAI_CONFIG)
41
46
  # In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
42
47
  if api_key is not None and model is not None:
43
- self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] = api_key
44
- self.config['NEUROSYMBOLIC_ENGINE_MODEL'] = model
45
- if self.id() != 'neurosymbolic':
46
- return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
47
- anthropic.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
48
- self.model = self.config['NEUROSYMBOLIC_ENGINE_MODEL']
48
+ self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] = api_key
49
+ self.config["NEUROSYMBOLIC_ENGINE_MODEL"] = model
50
+ if self.id() != "neurosymbolic":
51
+ return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
52
+ anthropic.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
53
+ self.model = self.config["NEUROSYMBOLIC_ENGINE_MODEL"]
49
54
  self.name = self.__class__.__name__
50
55
  self.tokenizer = TokenizerWrapper(self.compute_required_tokens)
51
56
  self.max_context_tokens = self.api_max_context_tokens()
@@ -53,95 +58,99 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
53
58
  self.client = anthropic.Anthropic(api_key=anthropic.api_key)
54
59
 
55
60
  def id(self) -> str:
56
- if self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
57
- self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
58
- ('3-7' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
59
- '4-0' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
60
- '4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
61
- '4-5' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
62
- return 'neurosymbolic'
63
- return super().id() # default to unregistered
61
+ if (
62
+ self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
63
+ and self.config.get("NEUROSYMBOLIC_ENGINE_MODEL").startswith("claude")
64
+ and (
65
+ "3-7" in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
66
+ or "4-0" in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
67
+ or "4-1" in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
68
+ or "4-5" in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
69
+ )
70
+ ):
71
+ return "neurosymbolic"
72
+ return super().id() # default to unregistered
64
73
 
65
74
  def command(self, *args, **kwargs):
66
75
  super().command(*args, **kwargs)
67
- if 'NEUROSYMBOLIC_ENGINE_API_KEY' in kwargs:
68
- anthropic.api_key = kwargs['NEUROSYMBOLIC_ENGINE_API_KEY']
69
- if 'NEUROSYMBOLIC_ENGINE_MODEL' in kwargs:
70
- self.model = kwargs['NEUROSYMBOLIC_ENGINE_MODEL']
76
+ if "NEUROSYMBOLIC_ENGINE_API_KEY" in kwargs:
77
+ anthropic.api_key = kwargs["NEUROSYMBOLIC_ENGINE_API_KEY"]
78
+ if "NEUROSYMBOLIC_ENGINE_MODEL" in kwargs:
79
+ self.model = kwargs["NEUROSYMBOLIC_ENGINE_MODEL"]
71
80
 
72
81
  def compute_required_tokens(self, messages) -> int:
73
- claude_messages = []
74
- system_content = None
75
-
76
- for msg in messages:
77
- if not isinstance(msg, list):
78
- msg = [msg]
79
- for part in msg:
80
- if isinstance(part, str):
81
- role = 'user'
82
- content_str = part
83
- elif isinstance(part, dict):
84
- role = part.get('role')
85
- content_str = str(part.get('content', ''))
86
- else:
87
- CustomUserWarning(f"Unsupported message part type: {type(part)}", raise_with=ValueError)
88
-
89
- if role == 'system':
90
- system_content = content_str
91
- continue
92
-
93
- if role in ['user', 'assistant']:
94
- message_content = []
95
-
96
- image_content = self._handle_image_content(content_str)
97
- message_content.extend(image_content)
98
-
99
- text_content = self._remove_vision_pattern(content_str)
100
- if text_content:
101
- message_content.append({
102
- "type": "text",
103
- "text": text_content
104
- })
105
-
106
- if message_content:
107
- if len(message_content) == 1 and message_content[0].get('type') == 'text':
108
- claude_messages.append({
109
- 'role': role,
110
- 'content': message_content[0]['text']
111
- })
112
- else:
113
- claude_messages.append({
114
- 'role': role,
115
- 'content': message_content
116
- })
82
+ claude_messages, system_content = self._normalize_messages_for_claude(messages)
117
83
 
118
84
  if not claude_messages:
119
85
  return 0
120
86
 
121
87
  try:
122
- count_params = {
123
- 'model': self.model,
124
- 'messages': claude_messages
125
- }
88
+ count_params = {"model": self.model, "messages": claude_messages}
126
89
  if system_content:
127
- count_params['system'] = system_content
90
+ count_params["system"] = system_content
128
91
  count_response = self.client.messages.count_tokens(**count_params)
129
92
  return count_response.input_tokens
130
93
  except Exception as e:
131
- logging.error(f"Claude count_tokens failed: {e}")
132
- CustomUserWarning(f"Error counting tokens for Claude: {str(e)}", raise_with=RuntimeError)
94
+ UserMessage(f"Claude count_tokens failed: {e}")
95
+ UserMessage(f"Error counting tokens for Claude: {e!s}", raise_with=RuntimeError)
96
+
97
+ def _normalize_messages_for_claude(self, messages):
98
+ claude_messages = []
99
+ system_content = None
100
+
101
+ for msg in messages:
102
+ msg_parts = msg if isinstance(msg, list) else [msg]
103
+ for part in msg_parts:
104
+ role, content_str = self._extract_role_and_content(part)
105
+ if role == "system":
106
+ system_content = content_str
107
+ continue
108
+
109
+ if role in ["user", "assistant"]:
110
+ message_payload = self._build_message_payload(role, content_str)
111
+ if message_payload:
112
+ claude_messages.append(message_payload)
133
113
 
134
- def compute_remaining_tokens(self, prompts: list) -> int:
135
- CustomUserWarning('Method not implemented.', raise_with=NotImplementedError)
114
+ return claude_messages, system_content
115
+
116
+ def _extract_role_and_content(self, part):
117
+ if isinstance(part, str):
118
+ return "user", part
119
+ if isinstance(part, dict):
120
+ return part.get("role"), str(part.get("content", ""))
121
+ UserMessage(f"Unsupported message part type: {type(part)}", raise_with=ValueError)
122
+ return None, ""
123
+
124
+ def _build_message_payload(self, role, content_str):
125
+ message_content = []
126
+
127
+ image_content = self._handle_image_content(content_str)
128
+ message_content.extend(image_content)
129
+
130
+ text_content = self._remove_vision_pattern(content_str)
131
+ if text_content:
132
+ message_content.append({"type": "text", "text": text_content})
133
+
134
+ if not message_content:
135
+ return None
136
+
137
+ if len(message_content) == 1 and message_content[0].get("type") == "text":
138
+ return {"role": role, "content": message_content[0]["text"]}
139
+
140
+ return {"role": role, "content": message_content}
141
+
142
+ def compute_remaining_tokens(self, _prompts: list) -> int:
143
+ UserMessage("Method not implemented.", raise_with=NotImplementedError)
136
144
 
137
145
  def _handle_image_content(self, content: str) -> list:
138
146
  """Handle image content by processing vision patterns and returning image file data."""
147
+
139
148
  def extract_pattern(text):
140
- pattern = r'<<vision:(.*?):>>'
149
+ pattern = r"<<vision:(.*?):>>"
141
150
  return re.findall(pattern, text)
142
151
 
143
152
  image_files = []
144
- if '<<vision:' in content:
153
+ if "<<vision:" in content:
145
154
  parts = extract_pattern(content)
146
155
  for p in parts:
147
156
  img_ = p.strip()
@@ -149,108 +158,131 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
149
158
  max_used_frames = 10
150
159
  buffer, ext = encode_media_frames(img_)
151
160
  if len(buffer) > 1:
152
- step = len(buffer) // max_frames_spacing # max frames spacing
161
+ step = len(buffer) // max_frames_spacing # max frames spacing
153
162
  frames = []
154
163
  indices = list(range(0, len(buffer), step))[:max_used_frames]
155
164
  for i in indices:
156
- frames.append({'data': buffer[i], 'media_type': f'image/{ext}', 'type': 'base64'})
165
+ frames.append(
166
+ {"data": buffer[i], "media_type": f"image/{ext}", "type": "base64"}
167
+ )
157
168
  image_files.extend(frames)
158
169
  elif len(buffer) == 1:
159
- image_files.append({'data': buffer[0], 'media_type': f'image/{ext}', 'type': 'base64'})
170
+ image_files.append(
171
+ {"data": buffer[0], "media_type": f"image/{ext}", "type": "base64"}
172
+ )
160
173
  else:
161
- CustomUserWarning(f'No frames found for image!')
174
+ UserMessage("No frames found for image!")
162
175
  return image_files
163
176
 
164
177
  def _remove_vision_pattern(self, text: str) -> str:
165
178
  """Remove vision patterns from text."""
166
- pattern = r'<<vision:(.*?):>>'
167
- return re.sub(pattern, '', text)
179
+ pattern = r"<<vision:(.*?):>>"
180
+ return re.sub(pattern, "", text)
168
181
 
169
182
  def forward(self, argument):
170
183
  kwargs = argument.kwargs
171
184
  system, messages = argument.prop.prepared_input
172
185
  payload = self._prepare_request_payload(argument)
173
- except_remedy = kwargs.get('except_remedy')
186
+ except_remedy = kwargs.get("except_remedy")
174
187
 
175
188
  try:
176
- res = self.client.messages.create(
177
- system=system,
178
- messages=messages,
179
- **payload
180
- )
189
+ res = self.client.messages.create(system=system, messages=messages, **payload)
181
190
  except Exception as e:
182
- if anthropic.api_key is None or anthropic.api_key == '':
183
- msg = 'Anthropic API key is not set. Please set it in the config file or pass it as an argument to the command method.'
184
- logging.error(msg)
185
- if self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] is None or self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] == '':
186
- CustomUserWarning(msg, raise_with=ValueError)
187
- anthropic.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
191
+ if anthropic.api_key is None or anthropic.api_key == "":
192
+ msg = "Anthropic API key is not set. Please set it in the config file or pass it as an argument to the command method."
193
+ UserMessage(msg)
194
+ if (
195
+ self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] is None
196
+ or self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] == ""
197
+ ):
198
+ UserMessage(msg, raise_with=ValueError)
199
+ anthropic.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
188
200
 
189
201
  callback = self.client.messages.create
190
- kwargs['model'] = kwargs['model'] if 'model' in kwargs else self.model
202
+ kwargs["model"] = kwargs.get("model", self.model)
191
203
 
192
204
  if except_remedy is not None:
193
205
  res = except_remedy(self, e, callback, argument)
194
206
  else:
195
- CustomUserWarning(f'Error during generation. Caused by: {e}', raise_with=ValueError)
207
+ UserMessage(f"Error during generation. Caused by: {e}", raise_with=ValueError)
196
208
 
197
- if payload['stream']:
198
- res = [_ for _ in res] # Unpack the iterator to a list
199
- metadata = {'raw_output': res}
209
+ if payload["stream"]:
210
+ res = list(res) # Unpack the iterator to a list
211
+ metadata = {"raw_output": res}
200
212
  response_data = self._collect_response(res)
201
213
 
202
- if response_data.get('function_call'):
203
- metadata['function_call'] = response_data['function_call']
214
+ if response_data.get("function_call"):
215
+ metadata["function_call"] = response_data["function_call"]
204
216
 
205
- if response_data.get('thinking') and len(response_data['thinking']) > 0:
206
- metadata['thinking'] = response_data['thinking']
217
+ if response_data.get("thinking") and len(response_data["thinking"]) > 0:
218
+ metadata["thinking"] = response_data["thinking"]
207
219
 
208
- text_output = response_data.get('text', '')
220
+ text_output = response_data.get("text", "")
209
221
  if argument.prop.response_format:
210
222
  # Anthropic returns JSON in markdown format
211
- text_output = text_output.replace('```json', '').replace('```', '')
223
+ text_output = text_output.replace("```json", "").replace("```", "")
212
224
 
213
225
  return [text_output], metadata
214
226
 
215
227
  def _prepare_raw_input(self, argument):
216
228
  if not argument.prop.processed_input:
217
- raise ValueError('Need to provide a prompt instruction to the engine if `raw_input` is enabled!')
229
+ msg = "Need to provide a prompt instruction to the engine if `raw_input` is enabled!"
230
+ UserMessage(msg)
231
+ raise ValueError(msg)
218
232
  system = NOT_GIVEN
219
233
  prompt = copy(argument.prop.processed_input)
220
- if type(prompt) != list:
221
- if type(prompt) != dict:
222
- prompt = {'role': 'user', 'content': str(prompt)}
234
+ if not isinstance(prompt, list):
235
+ if not isinstance(prompt, dict):
236
+ prompt = {"role": "user", "content": str(prompt)}
223
237
  prompt = [prompt]
224
238
  if len(prompt) > 1:
225
239
  # assert there are not more than 1 system instruction
226
- assert len([p for p in prompt if p['role'] == 'system']) <= 1, 'Only one system instruction is allowed!'
240
+ assert len([p for p in prompt if p["role"] == "system"]) <= 1, (
241
+ "Only one system instruction is allowed!"
242
+ )
227
243
  for p in prompt:
228
- if p['role'] == 'system':
229
- system = p['content']
244
+ if p["role"] == "system":
245
+ system = p["content"]
230
246
  prompt.remove(p)
231
247
  break
232
248
  return system, prompt
233
249
 
234
250
  def prepare(self, argument):
235
- #@NOTE: OpenAI compatibility at high level
251
+ # @NOTE: OpenAI compatibility at high level
236
252
  if argument.prop.raw_input:
237
253
  argument.prop.prepared_input = self._prepare_raw_input(argument)
238
254
  return
239
255
 
240
256
  _non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
241
- user: str = ""
242
- system: str = ""
257
+ image_files = self._handle_image_content(str(argument.prop.processed_input))
258
+ system = self._build_system_prompt(argument, _non_verbose_output, image_files)
259
+ user_text = self._build_user_text(argument, image_files)
260
+
261
+ if not user_text:
262
+ # Anthropic doesn't allow empty user prompts; force it
263
+ user_text = "N/A"
264
+
265
+ system, user_prompt = self._apply_self_prompt_if_needed(
266
+ argument, system, user_text, image_files
267
+ )
268
+
269
+ argument.prop.prepared_input = (system, [user_prompt])
270
+
271
+ def _build_system_prompt(self, argument, non_verbose_output, image_files):
272
+ system = ""
243
273
 
244
274
  if argument.prop.suppress_verbose_output:
245
- system += _non_verbose_output
246
- system = f'{system}\n' if system and len(system) > 0 else ''
275
+ system = f"{non_verbose_output}\n"
247
276
 
248
277
  if argument.prop.response_format:
249
- _rsp_fmt = argument.prop.response_format
250
- if not (_rsp_fmt.get('type') is not None):
251
- CustomUserWarning('Response format type is required! Expected format `{"type": "json_object"}` or other supported types. Refer to Anthropic documentation for details.', raise_with=AssertionError)
252
- system += _non_verbose_output
253
- system += f'<RESPONSE_FORMAT/>\n{_rsp_fmt["type"]}\n\n'
278
+ response_format = argument.prop.response_format
279
+ if not (response_format.get("type") is not None):
280
+ UserMessage(
281
+ 'Response format type is required! Expected format `{"type": "json_object"}` or other supported types. Refer to Anthropic documentation for details.',
282
+ raise_with=AssertionError,
283
+ )
284
+ system += non_verbose_output
285
+ system += f"<RESPONSE_FORMAT/>\n{response_format['type']}\n\n"
254
286
 
255
287
  ref = argument.prop.instance
256
288
  static_ctxt, dyn_ctxt = ref.global_context
@@ -261,94 +293,89 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
261
293
  system += f"<DYNAMIC_CONTEXT/>\n{dyn_ctxt}\n\n"
262
294
 
263
295
  payload = argument.prop.payload
264
- if argument.prop.payload:
265
- system += f"<ADDITIONAL_CONTEXT/>\n{str(payload)}\n\n"
296
+ if payload:
297
+ system += f"<ADDITIONAL_CONTEXT/>\n{payload!s}\n\n"
266
298
 
267
- examples: List[str] = argument.prop.examples
299
+ examples: list[str] = argument.prop.examples
268
300
  if examples and len(examples) > 0:
269
- system += f"<EXAMPLES/>\n{str(examples)}\n\n"
270
-
271
- image_files = self._handle_image_content(str(argument.prop.processed_input))
301
+ system += f"<EXAMPLES/>\n{examples!s}\n\n"
272
302
 
273
303
  if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
274
- val = str(argument.prop.prompt)
304
+ value = str(argument.prop.prompt)
275
305
  if len(image_files) > 0:
276
- val = self._remove_vision_pattern(val)
277
- system += f"<INSTRUCTION/>\n{val}\n\n"
306
+ value = self._remove_vision_pattern(value)
307
+ system += f"<INSTRUCTION/>\n{value}\n\n"
308
+
309
+ return self._append_template_suffix(system, argument.prop.template_suffix)
278
310
 
279
- suffix: str = str(argument.prop.processed_input)
311
+ def _build_user_text(self, argument, image_files):
312
+ suffix = str(argument.prop.processed_input)
280
313
  if len(image_files) > 0:
281
314
  suffix = self._remove_vision_pattern(suffix)
315
+ return suffix
282
316
 
283
- user += f"{suffix}"
317
+ def _append_template_suffix(self, system, template_suffix):
318
+ if template_suffix:
319
+ return system + (
320
+ f" You will only generate content for the placeholder `{template_suffix!s}` "
321
+ "following the instructions and the provided context information.\n\n"
322
+ )
323
+ return system
284
324
 
285
- if not len(user):
286
- # Anthropic doesn't allow empty user prompts; force it
287
- user = "N/A"
325
+ def _apply_self_prompt_if_needed(self, argument, system, user_text, image_files):
326
+ if not self._is_self_prompt_enabled(argument):
327
+ return system, self._format_user_prompt(user_text, image_files)
288
328
 
289
- if argument.prop.template_suffix:
290
- system += f' You will only generate content for the placeholder `{str(argument.prop.template_suffix)}` following the instructions and the provided context information.\n\n'
329
+ self_prompter = SelfPrompt()
330
+ response = self_prompter(
331
+ {"user": user_text, "system": system},
332
+ max_tokens=argument.kwargs.get("max_tokens", self.max_response_tokens),
333
+ thinking=argument.kwargs.get("thinking", NOT_GIVEN),
334
+ )
335
+ if response is None:
336
+ UserMessage("Self-prompting failed to return a response.", raise_with=ValueError)
291
337
 
292
- if len(image_files) > 0:
293
- images = [{ 'type': 'image', "source": im } for im in image_files]
294
- user_prompt = { "role": "user", "content": [
295
- *images,
296
- { 'type': 'text', 'text': user }
297
- ]}
298
- else:
299
- user_prompt = { "role": "user", "content": user }
300
-
301
- # First check if the `Symbol` instance has the flag set, otherwise check if it was passed as an argument to a method
302
- if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
303
- self_prompter = SelfPrompt()
304
-
305
- res = self_prompter(
306
- {'user': user, 'system': system},
307
- max_tokens=argument.kwargs.get('max_tokens', self.max_response_tokens),
308
- thinking=argument.kwargs.get('thinking', NOT_GIVEN),
309
- )
310
- if res is None:
311
- CustomUserWarning("Self-prompting failed to return a response.", raise_with=ValueError)
338
+ updated_prompt = self._format_user_prompt(response["user"], image_files)
339
+ return response["system"], updated_prompt
312
340
 
313
- if len(image_files) > 0:
314
- user_prompt = { "role": "user", "content": [
315
- *images,
316
- { 'type': 'text', 'text': res['user'] }
317
- ]}
318
- else:
319
- user_prompt = { "role": "user", "content": res['user'] }
341
+ def _is_self_prompt_enabled(self, argument):
342
+ return argument.prop.instance._kwargs.get("self_prompt", False) or argument.prop.self_prompt
320
343
 
321
- system = res['system']
344
+ def _format_user_prompt(self, user_text, image_files):
345
+ if len(image_files) > 0:
346
+ images = [{"type": "image", "source": im} for im in image_files]
347
+ return {"role": "user", "content": [*images, {"type": "text", "text": user_text}]}
322
348
 
323
- argument.prop.prepared_input = (system, [user_prompt])
349
+ return {"role": "user", "content": user_text}
324
350
 
325
351
  def _prepare_request_payload(self, argument):
326
352
  kwargs = argument.kwargs
327
- model = kwargs.get('model', self.model)
328
- stop = kwargs.get('stop', NOT_GIVEN)
329
- temperature = kwargs.get('temperature', 1)
330
- thinking_arg = kwargs.get('thinking', NOT_GIVEN)
353
+ model = kwargs.get("model", self.model)
354
+ stop = kwargs.get("stop", NOT_GIVEN)
355
+ temperature = kwargs.get("temperature", 1)
356
+ thinking_arg = kwargs.get("thinking", NOT_GIVEN)
331
357
  thinking = NOT_GIVEN
332
358
  if thinking_arg and isinstance(thinking_arg, dict):
333
- thinking = {
334
- "type": "enabled",
335
- "budget_tokens": thinking_arg.get("budget_tokens", 1024)
336
- }
337
- top_p = kwargs.get('top_p', NOT_GIVEN if temperature is not None else 1) #@NOTE:'You should either alter temperature or top_p, but not both.'
338
- top_k = kwargs.get('top_k', NOT_GIVEN)
339
- stream = kwargs.get('stream', True) # Do NOT remove this default value! Getting tons of API errors because they can't process requests >10m
340
- tools = kwargs.get('tools', NOT_GIVEN)
341
- tool_choice = kwargs.get('tool_choice', NOT_GIVEN)
342
- metadata_anthropic = kwargs.get('metadata', NOT_GIVEN)
343
- max_tokens = kwargs.get('max_tokens', self.max_response_tokens)
344
-
345
- if stop != NOT_GIVEN and type(stop) != list:
359
+ thinking = {"type": "enabled", "budget_tokens": thinking_arg.get("budget_tokens", 1024)}
360
+ top_p = kwargs.get(
361
+ "top_p", NOT_GIVEN if temperature is not None else 1
362
+ ) # @NOTE:'You should either alter temperature or top_p, but not both.'
363
+ top_k = kwargs.get("top_k", NOT_GIVEN)
364
+ stream = kwargs.get(
365
+ "stream", True
366
+ ) # Do NOT remove this default value! Getting tons of API errors because they can't process requests >10m
367
+ tools = kwargs.get("tools", NOT_GIVEN)
368
+ tool_choice = kwargs.get("tool_choice", NOT_GIVEN)
369
+ metadata_anthropic = kwargs.get("metadata", NOT_GIVEN)
370
+ max_tokens = kwargs.get("max_tokens", self.max_response_tokens)
371
+
372
+ if stop != NOT_GIVEN and not isinstance(stop, list):
346
373
  stop = [stop]
347
374
 
348
- #@NOTE: Anthropic fails if stop is not raw string, so cast it to r'…'
375
+ # @NOTE: Anthropic fails if stop is not raw string, so cast it to r'…'
349
376
  # E.g. when we use defaults in core.py, i.e. stop=['\n']
350
377
  if stop != NOT_GIVEN:
351
- stop = [r'{s}' for s in stop]
378
+ stop = [r"{s}" for s in stop]
352
379
 
353
380
  return {
354
381
  "model": model,
@@ -361,82 +388,120 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
361
388
  "stream": stream,
362
389
  "metadata": metadata_anthropic,
363
390
  "tools": tools,
364
- "tool_choice": tool_choice
391
+ "tool_choice": tool_choice,
365
392
  }
366
393
 
367
394
  def _collect_response(self, res):
368
395
  if isinstance(res, list):
369
- thinking_content = ''
370
- text_content = ''
371
- tool_calls_raw = []
372
- active_tool_calls = {}
373
-
374
- for chunk in res:
375
- if isinstance(chunk, RawContentBlockStartEvent):
376
- if isinstance(chunk.content_block, ToolUseBlock):
377
- active_tool_calls[chunk.index] = {
378
- 'id': chunk.content_block.id,
379
- 'name': chunk.content_block.name,
380
- 'input_json_str': ""
381
- }
382
- elif isinstance(chunk, RawContentBlockDeltaEvent):
383
- if isinstance(chunk.delta, ThinkingDelta):
384
- thinking_content += chunk.delta.thinking
385
- elif isinstance(chunk.delta, TextDelta):
386
- text_content += chunk.delta.text
387
- elif isinstance(chunk.delta, InputJSONDelta):
388
- if chunk.index in active_tool_calls:
389
- active_tool_calls[chunk.index]['input_json_str'] += chunk.delta.partial_json
390
- elif isinstance(chunk, RawContentBlockStopEvent):
391
- if chunk.index in active_tool_calls:
392
- tool_call_info = active_tool_calls.pop(chunk.index)
393
- try:
394
- tool_call_info['input'] = json.loads(tool_call_info['input_json_str'])
395
- except json.JSONDecodeError as e:
396
- logging.error(f"Failed to parse JSON for tool call {tool_call_info['name']}: {e}. Raw JSON: '{tool_call_info['input_json_str']}'")
397
- tool_call_info['input'] = {}
398
- tool_calls_raw.append(tool_call_info)
399
-
400
- function_call_data = None
401
- if tool_calls_raw:
402
- if len(tool_calls_raw) > 1:
403
- CustomUserWarning("Multiple tool calls detected in the stream but only the first one will be processed.")
404
- function_call_data = {
405
- 'name': tool_calls_raw[0]['name'],
406
- 'arguments': tool_calls_raw[0]['input']
407
- }
408
-
409
- return {
410
- "thinking": thinking_content,
411
- "text": text_content,
412
- "function_call": function_call_data
413
- }
396
+ return self._collect_stream_response(res)
414
397
 
415
- # Non-streamed response (res is a Message object)
416
398
  if isinstance(res, Message):
417
- thinking_content = ''
418
- text_content = ''
419
- function_call_data = None
420
- hit = False
421
-
422
- for content_block in res.content:
423
- if isinstance(content_block, ThinkingBlock):
424
- thinking_content += content_block.thinking
425
- elif isinstance(content_block, TextBlock):
426
- text_content += content_block.text
427
- elif isinstance(content_block, ToolUseBlock):
428
- if hit:
429
- CustomUserWarning("Multiple tool use blocks detected in the response but only the first one will be processed.")
430
- else:
431
- function_call_data = {
432
- 'name': content_block.name,
433
- 'arguments': content_block.input
434
- }
435
- hit = True
436
- return {
437
- "thinking": thinking_content,
438
- "text": text_content,
439
- "function_call": function_call_data
399
+ return self._collect_message_response(res)
400
+
401
+ UserMessage(
402
+ f"Unexpected response type from Anthropic API: {type(res)}", raise_with=ValueError
403
+ )
404
+ return {}
405
+
406
+ def _collect_stream_response(self, response_chunks):
407
+ accumulators = {"thinking": "", "text": ""}
408
+ tool_calls_raw = []
409
+ active_tool_calls = {}
410
+
411
+ for chunk in response_chunks:
412
+ self._process_stream_chunk(chunk, accumulators, active_tool_calls, tool_calls_raw)
413
+
414
+ function_call_data = self._extract_function_call(tool_calls_raw)
415
+ return {
416
+ "thinking": accumulators["thinking"],
417
+ "text": accumulators["text"],
418
+ "function_call": function_call_data,
419
+ }
420
+
421
+ def _process_stream_chunk(self, chunk, accumulators, active_tool_calls, tool_calls_raw):
422
+ if isinstance(chunk, RawContentBlockStartEvent):
423
+ self._register_tool_call(chunk, active_tool_calls)
424
+ elif isinstance(chunk, RawContentBlockDeltaEvent):
425
+ self._handle_delta_chunk(chunk, accumulators, active_tool_calls)
426
+ elif isinstance(chunk, RawContentBlockStopEvent):
427
+ self._finalize_tool_call(chunk, active_tool_calls, tool_calls_raw)
428
+
429
+ def _register_tool_call(self, chunk, active_tool_calls):
430
+ if isinstance(chunk.content_block, ToolUseBlock):
431
+ active_tool_calls[chunk.index] = {
432
+ "id": chunk.content_block.id,
433
+ "name": chunk.content_block.name,
434
+ "input_json_str": "",
440
435
  }
441
436
 
442
- CustomUserWarning(f"Unexpected response type from Anthropic API: {type(res)}", raise_with=ValueError)
437
+ def _handle_delta_chunk(self, chunk, accumulators, active_tool_calls):
438
+ if isinstance(chunk.delta, ThinkingDelta):
439
+ accumulators["thinking"] += chunk.delta.thinking
440
+ elif isinstance(chunk.delta, TextDelta):
441
+ accumulators["text"] += chunk.delta.text
442
+ elif isinstance(chunk.delta, InputJSONDelta) and chunk.index in active_tool_calls:
443
+ active_tool_calls[chunk.index]["input_json_str"] += chunk.delta.partial_json
444
+
445
+ def _finalize_tool_call(self, chunk, active_tool_calls, tool_calls_raw):
446
+ if chunk.index not in active_tool_calls:
447
+ return
448
+
449
+ tool_call_info = active_tool_calls.pop(chunk.index)
450
+ try:
451
+ tool_call_info["input"] = json.loads(tool_call_info["input_json_str"])
452
+ except json.JSONDecodeError as error:
453
+ UserMessage(
454
+ f"Failed to parse JSON for tool call {tool_call_info['name']}: {error}. Raw JSON: '{tool_call_info['input_json_str']}'"
455
+ )
456
+ tool_call_info["input"] = {}
457
+ tool_calls_raw.append(tool_call_info)
458
+
459
+ def _extract_function_call(self, tool_calls_raw):
460
+ if not tool_calls_raw:
461
+ return None
462
+
463
+ if len(tool_calls_raw) > 1:
464
+ UserMessage(
465
+ "Multiple tool calls detected in the stream but only the first one will be processed."
466
+ )
467
+
468
+ first_call = tool_calls_raw[0]
469
+ return {"name": first_call["name"], "arguments": first_call["input"]}
470
+
471
+ def _collect_message_response(self, message):
472
+ accumulators = {"thinking": "", "text": ""}
473
+ function_call_data = None
474
+ tool_call_detected = False
475
+
476
+ for content_block in message.content:
477
+ function_call_data, tool_call_detected = self._process_message_block(
478
+ content_block, accumulators, function_call_data, tool_call_detected
479
+ )
480
+
481
+ return {
482
+ "thinking": accumulators["thinking"],
483
+ "text": accumulators["text"],
484
+ "function_call": function_call_data,
485
+ }
486
+
487
+ def _process_message_block(
488
+ self, content_block, accumulators, function_call_data, tool_call_detected
489
+ ):
490
+ if isinstance(content_block, ThinkingBlock):
491
+ accumulators["thinking"] += content_block.thinking
492
+ return function_call_data, tool_call_detected
493
+
494
+ if isinstance(content_block, TextBlock):
495
+ accumulators["text"] += content_block.text
496
+ return function_call_data, tool_call_detected
497
+
498
+ if isinstance(content_block, ToolUseBlock):
499
+ if tool_call_detected:
500
+ UserMessage(
501
+ "Multiple tool use blocks detected in the response but only the first one will be processed."
502
+ )
503
+ return function_call_data, tool_call_detected
504
+
505
+ return {"name": content_block.name, "arguments": content_block.input}, True
506
+
507
+ return function_call_data, tool_call_detected