symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. symai/__init__.py +269 -173
  2. symai/backend/base.py +123 -110
  3. symai/backend/engines/drawing/engine_bfl.py +45 -44
  4. symai/backend/engines/drawing/engine_gpt_image.py +112 -97
  5. symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
  6. symai/backend/engines/embedding/engine_openai.py +25 -21
  7. symai/backend/engines/execute/engine_python.py +19 -18
  8. symai/backend/engines/files/engine_io.py +104 -95
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
  11. symai/backend/engines/index/engine_pinecone.py +124 -97
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +84 -56
  14. symai/backend/engines/lean/engine_lean4.py +96 -52
  15. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
  21. symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
  26. symai/backend/engines/ocr/engine_apilayer.py +23 -27
  27. symai/backend/engines/output/engine_stdout.py +10 -13
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
  29. symai/backend/engines/search/engine_openai.py +100 -88
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +44 -45
  32. symai/backend/engines/search/engine_serpapi.py +37 -34
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
  35. symai/backend/engines/text_to_speech/engine_openai.py +20 -26
  36. symai/backend/engines/text_vision/engine_clip.py +39 -37
  37. symai/backend/engines/userinput/engine_console.py +5 -6
  38. symai/backend/mixin/__init__.py +13 -0
  39. symai/backend/mixin/anthropic.py +48 -38
  40. symai/backend/mixin/deepseek.py +6 -5
  41. symai/backend/mixin/google.py +7 -4
  42. symai/backend/mixin/groq.py +2 -4
  43. symai/backend/mixin/openai.py +140 -110
  44. symai/backend/settings.py +87 -20
  45. symai/chat.py +216 -123
  46. symai/collect/__init__.py +7 -1
  47. symai/collect/dynamic.py +80 -70
  48. symai/collect/pipeline.py +67 -51
  49. symai/collect/stats.py +161 -109
  50. symai/components.py +707 -360
  51. symai/constraints.py +24 -12
  52. symai/core.py +1857 -1233
  53. symai/core_ext.py +83 -80
  54. symai/endpoints/api.py +166 -104
  55. symai/extended/.DS_Store +0 -0
  56. symai/extended/__init__.py +46 -12
  57. symai/extended/api_builder.py +29 -21
  58. symai/extended/arxiv_pdf_parser.py +23 -14
  59. symai/extended/bibtex_parser.py +9 -6
  60. symai/extended/conversation.py +156 -126
  61. symai/extended/document.py +50 -30
  62. symai/extended/file_merger.py +57 -14
  63. symai/extended/graph.py +51 -32
  64. symai/extended/html_style_template.py +18 -14
  65. symai/extended/interfaces/blip_2.py +2 -3
  66. symai/extended/interfaces/clip.py +4 -3
  67. symai/extended/interfaces/console.py +9 -1
  68. symai/extended/interfaces/dall_e.py +4 -2
  69. symai/extended/interfaces/file.py +2 -0
  70. symai/extended/interfaces/flux.py +4 -2
  71. symai/extended/interfaces/gpt_image.py +16 -7
  72. symai/extended/interfaces/input.py +2 -1
  73. symai/extended/interfaces/llava.py +1 -2
  74. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
  75. symai/extended/interfaces/naive_vectordb.py +9 -10
  76. symai/extended/interfaces/ocr.py +5 -3
  77. symai/extended/interfaces/openai_search.py +2 -0
  78. symai/extended/interfaces/parallel.py +30 -0
  79. symai/extended/interfaces/perplexity.py +2 -0
  80. symai/extended/interfaces/pinecone.py +12 -9
  81. symai/extended/interfaces/python.py +2 -0
  82. symai/extended/interfaces/serpapi.py +3 -1
  83. symai/extended/interfaces/terminal.py +2 -4
  84. symai/extended/interfaces/tts.py +3 -2
  85. symai/extended/interfaces/whisper.py +3 -2
  86. symai/extended/interfaces/wolframalpha.py +2 -1
  87. symai/extended/metrics/__init__.py +11 -1
  88. symai/extended/metrics/similarity.py +14 -13
  89. symai/extended/os_command.py +39 -29
  90. symai/extended/packages/__init__.py +29 -3
  91. symai/extended/packages/symdev.py +51 -43
  92. symai/extended/packages/sympkg.py +41 -35
  93. symai/extended/packages/symrun.py +63 -50
  94. symai/extended/repo_cloner.py +14 -12
  95. symai/extended/seo_query_optimizer.py +15 -13
  96. symai/extended/solver.py +116 -91
  97. symai/extended/summarizer.py +12 -10
  98. symai/extended/taypan_interpreter.py +17 -18
  99. symai/extended/vectordb.py +122 -92
  100. symai/formatter/__init__.py +9 -1
  101. symai/formatter/formatter.py +51 -47
  102. symai/formatter/regex.py +70 -69
  103. symai/functional.py +325 -176
  104. symai/imports.py +190 -147
  105. symai/interfaces.py +57 -28
  106. symai/memory.py +45 -35
  107. symai/menu/screen.py +28 -19
  108. symai/misc/console.py +66 -56
  109. symai/misc/loader.py +8 -5
  110. symai/models/__init__.py +17 -1
  111. symai/models/base.py +395 -236
  112. symai/models/errors.py +1 -2
  113. symai/ops/__init__.py +32 -22
  114. symai/ops/measures.py +24 -25
  115. symai/ops/primitives.py +1149 -731
  116. symai/post_processors.py +58 -50
  117. symai/pre_processors.py +86 -82
  118. symai/processor.py +21 -13
  119. symai/prompts.py +764 -685
  120. symai/server/huggingface_server.py +135 -49
  121. symai/server/llama_cpp_server.py +21 -11
  122. symai/server/qdrant_server.py +206 -0
  123. symai/shell.py +100 -42
  124. symai/shellsv.py +700 -492
  125. symai/strategy.py +630 -346
  126. symai/symbol.py +368 -322
  127. symai/utils.py +100 -78
  128. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
  129. symbolicai-1.1.0.dist-info/RECORD +168 -0
  130. symbolicai-0.21.0.dist-info/RECORD +0 -162
  131. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  132. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  133. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  134. {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -2,20 +2,22 @@ import json
2
2
  import logging
3
3
  import re
4
4
  from copy import copy, deepcopy
5
- from typing import List, Optional
6
5
 
7
6
  import anthropic
8
7
  from anthropic._types import NOT_GIVEN
9
- from anthropic.types import (InputJSONDelta, Message,
10
- RawContentBlockDeltaEvent,
11
- RawContentBlockStartEvent,
12
- RawContentBlockStopEvent, TextBlock, TextDelta,
13
- ToolUseBlock)
8
+ from anthropic.types import (
9
+ InputJSONDelta,
10
+ Message,
11
+ RawContentBlockDeltaEvent,
12
+ RawContentBlockStartEvent,
13
+ RawContentBlockStopEvent,
14
+ TextBlock,
15
+ TextDelta,
16
+ ToolUseBlock,
17
+ )
14
18
 
15
19
  from ....components import SelfPrompt
16
- from ....misc.console import ConsoleStyle
17
- from ....symbol import Symbol
18
- from ....utils import CustomUserWarning, encode_media_frames
20
+ from ....utils import UserMessage, encode_media_frames
19
21
  from ...base import Engine
20
22
  from ...mixin.anthropic import AnthropicMixin
21
23
  from ...settings import SYMAI_CONFIG
@@ -26,6 +28,7 @@ logging.getLogger("urllib").setLevel(logging.ERROR)
26
28
  logging.getLogger("httpx").setLevel(logging.ERROR)
27
29
  logging.getLogger("httpcore").setLevel(logging.ERROR)
28
30
 
31
+
29
32
  class TokenizerWrapper:
30
33
  def __init__(self, compute_tokens_func):
31
34
  self.compute_tokens_func = compute_tokens_func
@@ -33,18 +36,19 @@ class TokenizerWrapper:
33
36
  def encode(self, text: str) -> int:
34
37
  return self.compute_tokens_func([{"role": "user", "content": text}])
35
38
 
39
+
36
40
  class ClaudeXChatEngine(Engine, AnthropicMixin):
37
- def __init__(self, api_key: Optional[str] = None, model: Optional[str] = None):
41
+ def __init__(self, api_key: str | None = None, model: str | None = None):
38
42
  super().__init__()
39
43
  self.config = deepcopy(SYMAI_CONFIG)
40
44
  # In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
41
45
  if api_key is not None and model is not None:
42
- self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] = api_key
43
- self.config['NEUROSYMBOLIC_ENGINE_MODEL'] = model
44
- if self.id() != 'neurosymbolic':
45
- return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
46
- anthropic.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
47
- self.model = self.config['NEUROSYMBOLIC_ENGINE_MODEL']
46
+ self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] = api_key
47
+ self.config["NEUROSYMBOLIC_ENGINE_MODEL"] = model
48
+ if self.id() != "neurosymbolic":
49
+ return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
50
+ anthropic.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
51
+ self.model = self.config["NEUROSYMBOLIC_ENGINE_MODEL"]
48
52
  self.name = self.__class__.__name__
49
53
  self.tokenizer = TokenizerWrapper(self.compute_required_tokens)
50
54
  self.max_context_tokens = self.api_max_context_tokens()
@@ -52,95 +56,110 @@ class ClaudeXChatEngine(Engine, AnthropicMixin):
52
56
  self.client = anthropic.Anthropic(api_key=anthropic.api_key)
53
57
 
54
58
  def id(self) -> str:
55
- if self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
56
- self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
57
- ('3-7' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
58
- '4-0' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
59
- '4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
60
- '4-5' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
61
- return 'neurosymbolic'
62
- return super().id() # default to unregistered
59
+ if (
60
+ self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
61
+ and self.config.get("NEUROSYMBOLIC_ENGINE_MODEL").startswith("claude")
62
+ and (
63
+ "3-7" not in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
64
+ and "4-0" not in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
65
+ and "4-1" not in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
66
+ and "4-5" not in self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
67
+ )
68
+ ):
69
+ return "neurosymbolic"
70
+ return super().id() # default to unregistered
63
71
 
64
72
  def command(self, *args, **kwargs):
65
73
  super().command(*args, **kwargs)
66
- if 'NEUROSYMBOLIC_ENGINE_API_KEY' in kwargs:
67
- anthropic.api_key = kwargs['NEUROSYMBOLIC_ENGINE_API_KEY']
68
- if 'NEUROSYMBOLIC_ENGINE_MODEL' in kwargs:
69
- self.model = kwargs['NEUROSYMBOLIC_ENGINE_MODEL']
74
+ if "NEUROSYMBOLIC_ENGINE_API_KEY" in kwargs:
75
+ anthropic.api_key = kwargs["NEUROSYMBOLIC_ENGINE_API_KEY"]
76
+ if "NEUROSYMBOLIC_ENGINE_MODEL" in kwargs:
77
+ self.model = kwargs["NEUROSYMBOLIC_ENGINE_MODEL"]
70
78
 
71
79
  def compute_required_tokens(self, messages) -> int:
80
+ claude_messages, system_content = self._build_claude_messages(messages)
81
+
82
+ if not claude_messages:
83
+ return 0
84
+
85
+ return self._count_claude_tokens(claude_messages, system_content)
86
+
87
+ def _build_claude_messages(self, messages):
72
88
  claude_messages = []
73
89
  system_content = None
74
90
 
91
+ for role, content_str in self._message_parts(messages):
92
+ if role == "system":
93
+ system_content = content_str
94
+ continue
95
+
96
+ if role in ["user", "assistant"]:
97
+ message_content = self._build_message_content(content_str)
98
+ if message_content:
99
+ claude_messages.append(self._create_claude_message(role, message_content))
100
+
101
+ return claude_messages, system_content
102
+
103
+ def _message_parts(self, messages):
75
104
  for msg in messages:
76
- if not isinstance(msg, list):
77
- msg = [msg]
78
- for part in msg:
79
- if isinstance(part, str):
80
- role = 'user'
81
- content_str = part
82
- elif isinstance(part, dict):
83
- role = part.get('role')
84
- content_str = str(part.get('content', ''))
85
- else:
86
- CustomUserWarning(f"Unsupported message part type: {type(part)}", raise_with=ValueError)
87
-
88
- if role == 'system':
89
- system_content = content_str
90
- continue
91
-
92
- if role in ['user', 'assistant']:
93
- message_content = []
94
-
95
- image_content = self._handle_image_content(content_str)
96
- message_content.extend(image_content)
97
-
98
- text_content = self._remove_vision_pattern(content_str)
99
- if text_content:
100
- message_content.append({
101
- "type": "text",
102
- "text": text_content
103
- })
104
-
105
- if message_content:
106
- if len(message_content) == 1 and message_content[0].get('type') == 'text':
107
- claude_messages.append({
108
- 'role': role,
109
- 'content': message_content[0]['text']
110
- })
111
- else:
112
- claude_messages.append({
113
- 'role': role,
114
- 'content': message_content
115
- })
105
+ msg_parts = msg if isinstance(msg, list) else [msg]
106
+ for part in msg_parts:
107
+ yield self._extract_message_details(part)
116
108
 
117
- if not claude_messages:
118
- return 0
109
+ def _extract_message_details(self, part):
110
+ if isinstance(part, str):
111
+ return "user", part
119
112
 
113
+ if isinstance(part, dict):
114
+ role = part.get("role")
115
+ content_str = str(part.get("content", ""))
116
+ return role, content_str
117
+
118
+ msg = f"Unsupported message part type: {type(part)}"
119
+ UserMessage(msg, raise_with=ValueError)
120
+ raise ValueError(msg)
121
+
122
+ def _build_message_content(self, content_str: str) -> list:
123
+ message_content = []
124
+
125
+ image_content = self._handle_image_content(content_str)
126
+ message_content.extend(image_content)
127
+
128
+ text_content = self._remove_vision_pattern(content_str)
129
+ if text_content:
130
+ message_content.append({"type": "text", "text": text_content})
131
+
132
+ return message_content
133
+
134
+ def _create_claude_message(self, role: str, message_content: list) -> dict:
135
+ if len(message_content) == 1 and message_content[0].get("type") == "text":
136
+ return {"role": role, "content": message_content[0]["text"]}
137
+
138
+ return {"role": role, "content": message_content}
139
+
140
+ def _count_claude_tokens(self, claude_messages: list, system_content: str | None) -> int:
120
141
  try:
121
- count_params = {
122
- 'model': self.model,
123
- 'messages': claude_messages
124
- }
142
+ count_params = {"model": self.model, "messages": claude_messages}
125
143
  if system_content:
126
- count_params['system'] = system_content
144
+ count_params["system"] = system_content
127
145
  count_response = self.client.messages.count_tokens(**count_params)
128
146
  return count_response.input_tokens
129
147
  except Exception as e:
130
- logging.error(f"Claude count_tokens failed: {e}")
131
- CustomUserWarning(f"Error counting tokens for Claude: {str(e)}", raise_with=RuntimeError)
148
+ UserMessage(f"Claude count_tokens failed: {e}")
149
+ UserMessage(f"Error counting tokens for Claude: {e!s}", raise_with=RuntimeError)
132
150
 
133
- def compute_remaining_tokens(self, prompts: list) -> int:
134
- raise NotImplementedError('Method not implemented.')
151
+ def compute_remaining_tokens(self, _prompts: list) -> int:
152
+ UserMessage("Method not implemented.", raise_with=NotImplementedError)
135
153
 
136
154
  def _handle_image_content(self, content: str) -> list:
137
155
  """Handle image content by processing vision patterns and returning image file data."""
156
+
138
157
  def extract_pattern(text):
139
- pattern = r'<<vision:(.*?):>>'
158
+ pattern = r"<<vision:(.*?):>>"
140
159
  return re.findall(pattern, text)
141
160
 
142
161
  image_files = []
143
- if '<<vision:' in content:
162
+ if "<<vision:" in content:
144
163
  parts = extract_pattern(content)
145
164
  for p in parts:
146
165
  img_ = p.strip()
@@ -148,191 +167,229 @@ class ClaudeXChatEngine(Engine, AnthropicMixin):
148
167
  max_used_frames = 10
149
168
  buffer, ext = encode_media_frames(img_)
150
169
  if len(buffer) > 1:
151
- step = len(buffer) // max_frames_spacing # max frames spacing
170
+ step = len(buffer) // max_frames_spacing # max frames spacing
152
171
  frames = []
153
172
  indices = list(range(0, len(buffer), step))[:max_used_frames]
154
173
  for i in indices:
155
- frames.append({'data': buffer[i], 'media_type': f'image/{ext}', 'type': 'base64'})
174
+ frames.append(
175
+ {"data": buffer[i], "media_type": f"image/{ext}", "type": "base64"}
176
+ )
156
177
  image_files.extend(frames)
157
178
  elif len(buffer) == 1:
158
- image_files.append({'data': buffer[0], 'media_type': f'image/{ext}', 'type': 'base64'})
179
+ image_files.append(
180
+ {"data": buffer[0], "media_type": f"image/{ext}", "type": "base64"}
181
+ )
159
182
  else:
160
- CustomUserWarning(f'No frames found for image!')
183
+ UserMessage("No frames found for image!")
161
184
  return image_files
162
185
 
163
186
  def _remove_vision_pattern(self, text: str) -> str:
164
187
  """Remove vision patterns from text."""
165
- pattern = r'<<vision:(.*?):>>'
166
- return re.sub(pattern, '', text)
188
+ pattern = r"<<vision:(.*?):>>"
189
+ return re.sub(pattern, "", text)
167
190
 
168
191
  def forward(self, argument):
169
192
  kwargs = argument.kwargs
170
193
  system, messages = argument.prop.prepared_input
171
194
  payload = self._prepare_request_payload(argument)
172
- except_remedy = kwargs.get('except_remedy')
195
+ except_remedy = kwargs.get("except_remedy")
173
196
 
174
197
  try:
175
- res = self.client.messages.create(
176
- system=system,
177
- messages=messages,
178
- **payload
179
- )
198
+ res = self.client.messages.create(system=system, messages=messages, **payload)
180
199
  except Exception as e:
181
- if anthropic.api_key is None or anthropic.api_key == '':
182
- msg = 'Anthropic API key is not set. Please set it in the config file or pass it as an argument to the command method.'
183
- logging.error(msg)
184
- if self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] is None or self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] == '':
185
- CustomUserWarning(msg, raise_with=ValueError)
186
- anthropic.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
200
+ if anthropic.api_key is None or anthropic.api_key == "":
201
+ msg = "Anthropic API key is not set. Please set it in the config file or pass it as an argument to the command method."
202
+ UserMessage(msg)
203
+ if (
204
+ self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] is None
205
+ or self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] == ""
206
+ ):
207
+ UserMessage(msg, raise_with=ValueError)
208
+ anthropic.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
187
209
 
188
210
  callback = self.client.messages.create
189
- kwargs['model'] = kwargs['model'] if 'model' in kwargs else self.model
211
+ kwargs["model"] = kwargs.get("model", self.model)
190
212
 
191
213
  if except_remedy is not None:
192
214
  res = except_remedy(self, e, callback, argument)
193
215
  else:
194
- CustomUserWarning(f'Error during generation. Caused by: {e}', raise_with=ValueError)
216
+ UserMessage(f"Error during generation. Caused by: {e}", raise_with=ValueError)
195
217
 
196
- if payload['stream']:
197
- res = [_ for _ in res] # Unpack the iterator to a list
198
- metadata = {'raw_output': res}
218
+ if payload["stream"]:
219
+ res = list(res) # Unpack the iterator to a list
220
+ metadata = {"raw_output": res}
199
221
  response_data = self._collect_response(res)
200
222
 
201
- if response_data.get('function_call'):
202
- metadata['function_call'] = response_data['function_call']
223
+ if response_data.get("function_call"):
224
+ metadata["function_call"] = response_data["function_call"]
203
225
 
204
- text_output = response_data.get('text', '')
226
+ text_output = response_data.get("text", "")
205
227
  if argument.prop.response_format:
206
228
  # Anthropic returns JSON in markdown format
207
- text_output = text_output.replace('```json', '').replace('```', '')
229
+ text_output = text_output.replace("```json", "").replace("```", "")
208
230
 
209
231
  return [text_output], metadata
210
232
 
211
233
  def _prepare_raw_input(self, argument):
212
234
  if not argument.prop.processed_input:
213
- raise ValueError('Need to provide a prompt instruction to the engine if `raw_input` is enabled!')
235
+ msg = "Need to provide a prompt instruction to the engine if `raw_input` is enabled!"
236
+ UserMessage(msg)
237
+ raise ValueError(msg)
214
238
  system = NOT_GIVEN
215
239
  prompt = copy(argument.prop.processed_input)
216
- if type(prompt) != list:
217
- if type(prompt) != dict:
218
- prompt = {'role': 'user', 'content': str(prompt)}
240
+ if not isinstance(prompt, list):
241
+ if not isinstance(prompt, dict):
242
+ prompt = {"role": "user", "content": str(prompt)}
219
243
  prompt = [prompt]
220
244
  if len(prompt) > 1:
221
245
  # assert there are not more than 1 system instruction
222
- assert len([p for p in prompt if p['role'] == 'system']) <= 1, 'Only one system instruction is allowed!'
246
+ assert len([p for p in prompt if p["role"] == "system"]) <= 1, (
247
+ "Only one system instruction is allowed!"
248
+ )
223
249
  for p in prompt:
224
- if p['role'] == 'system':
225
- system = p['content']
250
+ if p["role"] == "system":
251
+ system = p["content"]
226
252
  prompt.remove(p)
227
253
  break
228
254
  return system, prompt
229
255
 
230
256
  def prepare(self, argument):
231
- #@NOTE: OpenAI compatibility at high level
257
+ # @NOTE: OpenAI compatibility at high level
232
258
  if argument.prop.raw_input:
233
259
  argument.prop.prepared_input = self._prepare_raw_input(argument)
234
260
  return
235
261
 
236
- _non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
237
- user: str = ""
238
- system: str = ""
262
+ non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
263
+ image_files = self._handle_image_content(str(argument.prop.processed_input))
264
+ has_image = len(image_files) > 0
265
+
266
+ system = self._build_system_prompt(argument, has_image, non_verbose_output)
267
+ user_text, user_prompt, image_blocks = self._build_user_prompt(argument, image_files)
268
+ system, user_prompt = self._apply_self_prompt_if_needed(
269
+ argument, system, user_text, image_blocks, user_prompt
270
+ )
239
271
 
272
+ argument.prop.prepared_input = (system, [user_prompt])
273
+
274
+ def _build_system_prompt(self, argument, has_image: bool, non_verbose_output: str) -> str:
275
+ system = self._build_system_prefix(argument, non_verbose_output)
276
+ system = self._append_context_sections(system, argument)
277
+ system = self._append_instruction_section(system, argument, has_image)
278
+ return self._append_template_suffix(system, argument)
279
+
280
+ def _build_system_prefix(self, argument, non_verbose_output: str) -> str:
281
+ system = ""
240
282
  if argument.prop.suppress_verbose_output:
241
- system += _non_verbose_output
242
- system = f'{system}\n' if system and len(system) > 0 else ''
283
+ system += non_verbose_output
284
+
285
+ system = f"{system}\n" if system and len(system) > 0 else ""
243
286
 
244
287
  if argument.prop.response_format:
245
- _rsp_fmt = argument.prop.response_format
246
- assert _rsp_fmt.get('type') is not None, 'Response format type is required! Expected format `{"type": str}`! The str value will be passed to the engine. Refer to the Anthropic documentation for more information: https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#example-standardizing-customer-feedback'
247
- system += _non_verbose_output
248
- system += f'<RESPONSE_FORMAT/>\n{_rsp_fmt["type"]}\n\n'
288
+ response_format = argument.prop.response_format
289
+ assert response_format.get("type") is not None, (
290
+ 'Response format type is required! Expected format `{"type": str}`! The str value will be passed to the engine. Refer to the Anthropic documentation for more information: https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#example-standardizing-customer-feedback'
291
+ )
292
+ system += non_verbose_output
293
+ system += f"<RESPONSE_FORMAT/>\n{response_format['type']}\n\n"
294
+
295
+ return system
249
296
 
297
+ def _append_context_sections(self, system: str, argument) -> str:
250
298
  ref = argument.prop.instance
251
- static_ctxt, dyn_ctxt = ref.global_context
252
- if len(static_ctxt) > 0:
253
- system += f"<STATIC_CONTEXT/>\n{static_ctxt}\n\n"
299
+ static_context, dynamic_context = ref.global_context
300
+
301
+ if len(static_context) > 0:
302
+ system += f"<STATIC_CONTEXT/>\n{static_context}\n\n"
254
303
 
255
- if len(dyn_ctxt) > 0:
256
- system += f"<DYNAMIC_CONTEXT/>\n{dyn_ctxt}\n\n"
304
+ if len(dynamic_context) > 0:
305
+ system += f"<DYNAMIC_CONTEXT/>\n{dynamic_context}\n\n"
257
306
 
258
307
  payload = argument.prop.payload
259
308
  if argument.prop.payload:
260
- system += f"<ADDITIONAL_CONTEXT/>\n{str(payload)}\n\n"
309
+ system += f"<ADDITIONAL_CONTEXT/>\n{payload!s}\n\n"
261
310
 
262
- examples: List[str] = argument.prop.examples
311
+ examples: list[str] = argument.prop.examples
263
312
  if examples and len(examples) > 0:
264
- system += f"<EXAMPLES/>\n{str(examples)}\n\n"
313
+ system += f"<EXAMPLES/>\n{examples!s}\n\n"
265
314
 
266
- image_files = self._handle_image_content(str(argument.prop.processed_input))
315
+ return system
267
316
 
317
+ def _append_instruction_section(self, system: str, argument, has_image: bool) -> str:
268
318
  if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
269
- val = str(argument.prop.prompt)
270
- if len(image_files) > 0:
271
- val = self._remove_vision_pattern(val)
272
- system += f"<INSTRUCTION/>\n{val}\n\n"
319
+ instruction_value = str(argument.prop.prompt)
320
+ if has_image:
321
+ instruction_value = self._remove_vision_pattern(instruction_value)
322
+ system += f"<INSTRUCTION/>\n{instruction_value}\n\n"
323
+
324
+ return system
325
+
326
+ def _append_template_suffix(self, system: str, argument) -> str:
327
+ if argument.prop.template_suffix:
328
+ system += f" You will only generate content for the placeholder `{argument.prop.template_suffix!s}` following the instructions and the provided context information.\n\n"
273
329
 
274
- suffix: str = str(argument.prop.processed_input)
330
+ return system
331
+
332
+ def _build_user_prompt(self, argument, image_files):
333
+ suffix = str(argument.prop.processed_input)
275
334
  if len(image_files) > 0:
276
335
  suffix = self._remove_vision_pattern(suffix)
277
336
 
278
- user += f"{suffix}"
337
+ user_text = f"{suffix}"
338
+ if not user_text:
339
+ user_text = "N/A"
279
340
 
280
- if not len(user):
281
- # Anthropic doesn't allow empty user prompts; force it
282
- user = "N/A"
341
+ image_blocks = [{"type": "image", "source": image_file} for image_file in image_files]
342
+ user_prompt = self._wrap_user_prompt_content(user_text, image_blocks)
343
+ return user_text, user_prompt, image_blocks
283
344
 
284
- if argument.prop.template_suffix:
285
- system += f' You will only generate content for the placeholder `{str(argument.prop.template_suffix)}` following the instructions and the provided context information.\n\n'
345
+ def _wrap_user_prompt_content(self, user_text: str, image_blocks: list[dict]) -> dict:
346
+ if len(image_blocks) > 0:
347
+ return {"role": "user", "content": [*image_blocks, {"type": "text", "text": user_text}]}
286
348
 
287
- if len(image_files) > 0:
288
- images = [{ 'type': 'image', "source": im } for im in image_files]
289
- user_prompt = { "role": "user", "content": [
290
- *images,
291
- { 'type': 'text', 'text': user }
292
- ]}
293
- else:
294
- user_prompt = { "role": "user", "content": user }
295
-
296
- # First check if the `Symbol` instance has the flag set, otherwise check if it was passed as an argument to a method
297
- if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
298
- self_prompter = SelfPrompt()
299
-
300
- res = self_prompter({'user': user, 'system': system})
301
- if res is None:
302
- raise ValueError("Self-prompting failed!")
303
-
304
- if len(image_files) > 0:
305
- user_prompt = { "role": "user", "content": [
306
- *images,
307
- { 'type': 'text', 'text': res['user'] }
308
- ]}
309
- else:
310
- user_prompt = { "role": "user", "content": res['user'] }
349
+ return {"role": "user", "content": user_text}
311
350
 
312
- system = res['system']
351
+ def _apply_self_prompt_if_needed(
352
+ self, argument, system: str, user_text: str, image_blocks: list[dict], user_prompt: dict
353
+ ):
354
+ if not (
355
+ argument.prop.instance._kwargs.get("self_prompt", False) or argument.prop.self_prompt
356
+ ):
357
+ return system, user_prompt
313
358
 
314
- argument.prop.prepared_input = (system, [user_prompt])
359
+ self_prompter = SelfPrompt()
360
+ res = self_prompter({"user": user_text, "system": system})
361
+ if res is None:
362
+ msg = "Self-prompting failed!"
363
+ UserMessage(msg)
364
+ raise ValueError(msg)
365
+
366
+ updated_user_prompt = self._wrap_user_prompt_content(res["user"], image_blocks)
367
+ return res["system"], updated_user_prompt
315
368
 
316
369
  def _prepare_request_payload(self, argument):
317
370
  kwargs = argument.kwargs
318
- model = kwargs.get('model', self.model)
319
- max_tokens = kwargs.get('max_tokens', self.max_response_tokens)
320
- stop = kwargs.get('stop', NOT_GIVEN)
321
- temperature = kwargs.get('temperature', 1)
322
- top_p = kwargs.get('top_p', NOT_GIVEN if temperature is not None else 1) #@NOTE:'You should either alter temperature or top_p, but not both.'
323
- top_k = kwargs.get('top_k', NOT_GIVEN)
324
- stream = kwargs.get('stream', True) # Do NOT remove this default value! Getting tons of API errors because they can't process requests >10m
325
- tools = kwargs.get('tools', NOT_GIVEN)
326
- tool_choice = kwargs.get('tool_choice', NOT_GIVEN)
327
- metadata_anthropic = kwargs.get('metadata', NOT_GIVEN)
328
-
329
- if stop != NOT_GIVEN and type(stop) != list:
371
+ model = kwargs.get("model", self.model)
372
+ max_tokens = kwargs.get("max_tokens", self.max_response_tokens)
373
+ stop = kwargs.get("stop", NOT_GIVEN)
374
+ temperature = kwargs.get("temperature", 1)
375
+ top_p = kwargs.get(
376
+ "top_p", NOT_GIVEN if temperature is not None else 1
377
+ ) # @NOTE:'You should either alter temperature or top_p, but not both.'
378
+ top_k = kwargs.get("top_k", NOT_GIVEN)
379
+ stream = kwargs.get(
380
+ "stream", True
381
+ ) # Do NOT remove this default value! Getting tons of API errors because they can't process requests >10m
382
+ tools = kwargs.get("tools", NOT_GIVEN)
383
+ tool_choice = kwargs.get("tool_choice", NOT_GIVEN)
384
+ metadata_anthropic = kwargs.get("metadata", NOT_GIVEN)
385
+
386
+ if stop != NOT_GIVEN and not isinstance(stop, list):
330
387
  stop = [stop]
331
388
 
332
- #@NOTE: Anthropic fails if stop is not raw string, so cast it to r'…'
389
+ # @NOTE: Anthropic fails if stop is not raw string, so cast it to r'…'
333
390
  # E.g. when we use defaults in core.py, i.e. stop=['\n']
334
391
  if stop != NOT_GIVEN:
335
- stop = [r'{s}' for s in stop]
392
+ stop = [r"{s}" for s in stop]
336
393
 
337
394
  return {
338
395
  "model": model,
@@ -344,74 +401,99 @@ class ClaudeXChatEngine(Engine, AnthropicMixin):
344
401
  "stream": stream,
345
402
  "metadata": metadata_anthropic,
346
403
  "tools": tools,
347
- "tool_choice": tool_choice
404
+ "tool_choice": tool_choice,
348
405
  }
349
406
 
350
407
  def _collect_response(self, res):
351
408
  if isinstance(res, list):
352
- text_content = ''
353
- tool_calls_raw = []
354
- active_tool_calls = {}
355
-
356
- for chunk in res:
357
- if isinstance(chunk, RawContentBlockStartEvent):
358
- if isinstance(chunk.content_block, ToolUseBlock):
359
- active_tool_calls[chunk.index] = {
360
- 'id': chunk.content_block.id,
361
- 'name': chunk.content_block.name,
362
- 'input_json_str': ""
363
- }
364
- elif isinstance(chunk, RawContentBlockDeltaEvent):
365
- if isinstance(chunk.delta, TextDelta):
366
- text_content += chunk.delta.text
367
- elif isinstance(chunk.delta, InputJSONDelta):
368
- if chunk.index in active_tool_calls:
369
- active_tool_calls[chunk.index]['input_json_str'] += chunk.delta.partial_json
370
- elif isinstance(chunk, RawContentBlockStopEvent):
371
- if chunk.index in active_tool_calls:
372
- tool_call_info = active_tool_calls.pop(chunk.index)
373
- try:
374
- tool_call_info['input'] = json.loads(tool_call_info['input_json_str'])
375
- except json.JSONDecodeError as e:
376
- logging.error(f"Failed to parse JSON for tool call {tool_call_info['name']}: {e}. Raw JSON: '{tool_call_info['input_json_str']}'")
377
- tool_call_info['input'] = {}
378
- tool_calls_raw.append(tool_call_info)
379
-
380
- function_call_data = None
381
- if tool_calls_raw:
382
- if len(tool_calls_raw) > 1:
383
- CustomUserWarning("Multiple tool calls detected in the stream but only the first one will be processed.")
384
- function_call_data = {
385
- 'name': tool_calls_raw[0]['name'],
386
- 'arguments': tool_calls_raw[0]['input']
387
- }
388
-
389
- return {
390
- "text": text_content,
391
- "function_call": function_call_data
392
- }
409
+ return self._collect_streaming_response(res)
393
410
 
394
- # Non-streamed response (res is a Message object)
395
411
  if isinstance(res, Message):
396
- text_content = ''
397
- function_call_data = None
398
- hit_tool_use = False
399
-
400
- for content_block in res.content:
401
- if isinstance(content_block, TextBlock):
402
- text_content += content_block.text
403
- elif isinstance(content_block, ToolUseBlock):
404
- if hit_tool_use:
405
- CustomUserWarning("Multiple tool use blocks detected in the response but only the first one will be processed.")
406
- else:
407
- function_call_data = {
408
- 'name': content_block.name,
409
- 'arguments': content_block.input
410
- }
411
- hit_tool_use = True
412
- return {
413
- "text": text_content,
414
- "function_call": function_call_data
412
+ return self._collect_message_response(res)
413
+
414
+ UserMessage(
415
+ f"Unexpected response type from Anthropic API: {type(res)}", raise_with=ValueError
416
+ )
417
+ return {}
418
+
419
+ def _collect_streaming_response(self, res):
420
+ text_parts = []
421
+ tool_calls_raw = []
422
+ active_tool_calls = {}
423
+
424
+ for chunk in res:
425
+ if isinstance(chunk, RawContentBlockStartEvent):
426
+ self._start_tool_call(chunk, active_tool_calls)
427
+ elif isinstance(chunk, RawContentBlockDeltaEvent):
428
+ self._update_stream_chunk(chunk, text_parts, active_tool_calls)
429
+ elif isinstance(chunk, RawContentBlockStopEvent):
430
+ tool_call = self._finish_tool_call(chunk, active_tool_calls)
431
+ if tool_call is not None:
432
+ tool_calls_raw.append(tool_call)
433
+
434
+ text_content = "".join(text_parts)
435
+ function_call_data = self._build_function_call_data(tool_calls_raw)
436
+
437
+ return {"text": text_content, "function_call": function_call_data}
438
+
439
+ def _start_tool_call(self, chunk, active_tool_calls: dict):
440
+ if isinstance(chunk.content_block, ToolUseBlock):
441
+ active_tool_calls[chunk.index] = {
442
+ "id": chunk.content_block.id,
443
+ "name": chunk.content_block.name,
444
+ "input_json_str": "",
415
445
  }
416
446
 
417
- CustomUserWarning(f"Unexpected response type from Anthropic API: {type(res)}", raise_with=ValueError)
447
+ def _update_stream_chunk(self, chunk, text_parts: list, active_tool_calls: dict):
448
+ if isinstance(chunk.delta, TextDelta):
449
+ text_parts.append(chunk.delta.text)
450
+ elif isinstance(chunk.delta, InputJSONDelta) and chunk.index in active_tool_calls:
451
+ active_tool_calls[chunk.index]["input_json_str"] += chunk.delta.partial_json
452
+
453
+ def _finish_tool_call(self, chunk, active_tool_calls: dict):
454
+ if chunk.index not in active_tool_calls:
455
+ return None
456
+
457
+ tool_call_info = active_tool_calls.pop(chunk.index)
458
+ try:
459
+ tool_call_info["input"] = json.loads(tool_call_info["input_json_str"])
460
+ except json.JSONDecodeError as e:
461
+ UserMessage(
462
+ f"Failed to parse JSON for tool call {tool_call_info['name']}: {e}. Raw JSON: '{tool_call_info['input_json_str']}'"
463
+ )
464
+ tool_call_info["input"] = {}
465
+ return tool_call_info
466
+
467
+ def _build_function_call_data(self, tool_calls_raw: list | None) -> dict | None:
468
+ if not tool_calls_raw:
469
+ return None
470
+
471
+ if len(tool_calls_raw) > 1:
472
+ UserMessage(
473
+ "Multiple tool calls detected in the stream but only the first one will be processed."
474
+ )
475
+
476
+ tool_call = tool_calls_raw[0]
477
+ return {"name": tool_call["name"], "arguments": tool_call["input"]}
478
+
479
+ def _collect_message_response(self, res: Message):
480
+ text_parts = []
481
+ function_call_data = None
482
+ hit_tool_use = False
483
+
484
+ for content_block in res.content:
485
+ if isinstance(content_block, TextBlock):
486
+ text_parts.append(content_block.text)
487
+ elif isinstance(content_block, ToolUseBlock):
488
+ if hit_tool_use:
489
+ UserMessage(
490
+ "Multiple tool use blocks detected in the response but only the first one will be processed."
491
+ )
492
+ else:
493
+ function_call_data = {
494
+ "name": content_block.name,
495
+ "arguments": content_block.input,
496
+ }
497
+ hit_tool_use = True
498
+
499
+ return {"text": "".join(text_parts), "function_call": function_call_data}