symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +269 -173
- symai/backend/base.py +123 -110
- symai/backend/engines/drawing/engine_bfl.py +45 -44
- symai/backend/engines/drawing/engine_gpt_image.py +112 -97
- symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
- symai/backend/engines/embedding/engine_openai.py +25 -21
- symai/backend/engines/execute/engine_python.py +19 -18
- symai/backend/engines/files/engine_io.py +104 -95
- symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
- symai/backend/engines/index/engine_pinecone.py +124 -97
- symai/backend/engines/index/engine_qdrant.py +1011 -0
- symai/backend/engines/index/engine_vectordb.py +84 -56
- symai/backend/engines/lean/engine_lean4.py +96 -52
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
- symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
- symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
- symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
- symai/backend/engines/ocr/engine_apilayer.py +23 -27
- symai/backend/engines/output/engine_stdout.py +10 -13
- symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
- symai/backend/engines/search/engine_openai.py +100 -88
- symai/backend/engines/search/engine_parallel.py +665 -0
- symai/backend/engines/search/engine_perplexity.py +44 -45
- symai/backend/engines/search/engine_serpapi.py +37 -34
- symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
- symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
- symai/backend/engines/text_to_speech/engine_openai.py +20 -26
- symai/backend/engines/text_vision/engine_clip.py +39 -37
- symai/backend/engines/userinput/engine_console.py +5 -6
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +48 -38
- symai/backend/mixin/deepseek.py +6 -5
- symai/backend/mixin/google.py +7 -4
- symai/backend/mixin/groq.py +2 -4
- symai/backend/mixin/openai.py +140 -110
- symai/backend/settings.py +87 -20
- symai/chat.py +216 -123
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +80 -70
- symai/collect/pipeline.py +67 -51
- symai/collect/stats.py +161 -109
- symai/components.py +707 -360
- symai/constraints.py +24 -12
- symai/core.py +1857 -1233
- symai/core_ext.py +83 -80
- symai/endpoints/api.py +166 -104
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +29 -21
- symai/extended/arxiv_pdf_parser.py +23 -14
- symai/extended/bibtex_parser.py +9 -6
- symai/extended/conversation.py +156 -126
- symai/extended/document.py +50 -30
- symai/extended/file_merger.py +57 -14
- symai/extended/graph.py +51 -32
- symai/extended/html_style_template.py +18 -14
- symai/extended/interfaces/blip_2.py +2 -3
- symai/extended/interfaces/clip.py +4 -3
- symai/extended/interfaces/console.py +9 -1
- symai/extended/interfaces/dall_e.py +4 -2
- symai/extended/interfaces/file.py +2 -0
- symai/extended/interfaces/flux.py +4 -2
- symai/extended/interfaces/gpt_image.py +16 -7
- symai/extended/interfaces/input.py +2 -1
- symai/extended/interfaces/llava.py +1 -2
- symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
- symai/extended/interfaces/naive_vectordb.py +9 -10
- symai/extended/interfaces/ocr.py +5 -3
- symai/extended/interfaces/openai_search.py +2 -0
- symai/extended/interfaces/parallel.py +30 -0
- symai/extended/interfaces/perplexity.py +2 -0
- symai/extended/interfaces/pinecone.py +12 -9
- symai/extended/interfaces/python.py +2 -0
- symai/extended/interfaces/serpapi.py +3 -1
- symai/extended/interfaces/terminal.py +2 -4
- symai/extended/interfaces/tts.py +3 -2
- symai/extended/interfaces/whisper.py +3 -2
- symai/extended/interfaces/wolframalpha.py +2 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +14 -13
- symai/extended/os_command.py +39 -29
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +51 -43
- symai/extended/packages/sympkg.py +41 -35
- symai/extended/packages/symrun.py +63 -50
- symai/extended/repo_cloner.py +14 -12
- symai/extended/seo_query_optimizer.py +15 -13
- symai/extended/solver.py +116 -91
- symai/extended/summarizer.py +12 -10
- symai/extended/taypan_interpreter.py +17 -18
- symai/extended/vectordb.py +122 -92
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +51 -47
- symai/formatter/regex.py +70 -69
- symai/functional.py +325 -176
- symai/imports.py +190 -147
- symai/interfaces.py +57 -28
- symai/memory.py +45 -35
- symai/menu/screen.py +28 -19
- symai/misc/console.py +66 -56
- symai/misc/loader.py +8 -5
- symai/models/__init__.py +17 -1
- symai/models/base.py +395 -236
- symai/models/errors.py +1 -2
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +24 -25
- symai/ops/primitives.py +1149 -731
- symai/post_processors.py +58 -50
- symai/pre_processors.py +86 -82
- symai/processor.py +21 -13
- symai/prompts.py +764 -685
- symai/server/huggingface_server.py +135 -49
- symai/server/llama_cpp_server.py +21 -11
- symai/server/qdrant_server.py +206 -0
- symai/shell.py +100 -42
- symai/shellsv.py +700 -492
- symai/strategy.py +630 -346
- symai/symbol.py +368 -322
- symai/utils.py +100 -78
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
- symbolicai-1.1.0.dist-info/RECORD +168 -0
- symbolicai-0.21.0.dist-info/RECORD +0 -162
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
|
-
import io
|
|
3
2
|
import logging
|
|
4
3
|
import mimetypes
|
|
5
4
|
import re
|
|
6
|
-
import urllib.parse
|
|
7
5
|
from copy import deepcopy
|
|
8
6
|
from pathlib import Path
|
|
9
7
|
|
|
@@ -12,9 +10,7 @@ from google import genai
|
|
|
12
10
|
from google.genai import types
|
|
13
11
|
|
|
14
12
|
from ....components import SelfPrompt
|
|
15
|
-
from ....
|
|
16
|
-
from ....symbol import Symbol
|
|
17
|
-
from ....utils import CustomUserWarning, encode_media_frames
|
|
13
|
+
from ....utils import UserMessage, encode_media_frames
|
|
18
14
|
from ...base import Engine
|
|
19
15
|
from ...mixin.google import GoogleMixin
|
|
20
16
|
from ...settings import SYMAI_CONFIG
|
|
@@ -34,19 +30,20 @@ class TokenizerWrapper:
|
|
|
34
30
|
def encode(self, text: str) -> int:
|
|
35
31
|
return self.compute_tokens_func([{"role": "user", "content": text}])
|
|
36
32
|
|
|
33
|
+
|
|
37
34
|
class GeminiXReasoningEngine(Engine, GoogleMixin):
|
|
38
35
|
def __init__(self, api_key: str | None = None, model: str | None = None):
|
|
39
36
|
super().__init__()
|
|
40
37
|
self.config = deepcopy(SYMAI_CONFIG)
|
|
41
38
|
# In case we use EngineRepository.register to inject the api_key and model => dynamically change the engine at runtime
|
|
42
39
|
if api_key is not None and model is not None:
|
|
43
|
-
self.config[
|
|
44
|
-
self.config[
|
|
45
|
-
if self.id() !=
|
|
46
|
-
return
|
|
40
|
+
self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] = api_key
|
|
41
|
+
self.config["NEUROSYMBOLIC_ENGINE_MODEL"] = model
|
|
42
|
+
if self.id() != "neurosymbolic":
|
|
43
|
+
return # do not initialize if not neurosymbolic; avoids conflict with llama.cpp check in EngineRepository.register_from_package
|
|
47
44
|
|
|
48
|
-
self.api_key = self.config[
|
|
49
|
-
self.model = self.config[
|
|
45
|
+
self.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
|
|
46
|
+
self.model = self.config["NEUROSYMBOLIC_ENGINE_MODEL"]
|
|
50
47
|
self.name = self.__class__.__name__
|
|
51
48
|
self.tokenizer = TokenizerWrapper(self.compute_required_tokens)
|
|
52
49
|
self.max_context_tokens = self.api_max_context_tokens()
|
|
@@ -54,33 +51,32 @@ class GeminiXReasoningEngine(Engine, GoogleMixin):
|
|
|
54
51
|
self.client = genai.Client(api_key=self.api_key)
|
|
55
52
|
|
|
56
53
|
def id(self) -> str:
|
|
57
|
-
model = self.config.get(
|
|
58
|
-
if model and model.startswith(
|
|
59
|
-
return
|
|
60
|
-
return super().id()
|
|
54
|
+
model = self.config.get("NEUROSYMBOLIC_ENGINE_MODEL")
|
|
55
|
+
if model and model.startswith("gemini"):
|
|
56
|
+
return "neurosymbolic"
|
|
57
|
+
return super().id() # default to unregistered
|
|
61
58
|
|
|
62
59
|
def command(self, *args, **kwargs):
|
|
63
60
|
super().command(*args, **kwargs)
|
|
64
|
-
if
|
|
65
|
-
self.api_key = kwargs[
|
|
61
|
+
if "NEUROSYMBOLIC_ENGINE_API_KEY" in kwargs:
|
|
62
|
+
self.api_key = kwargs["NEUROSYMBOLIC_ENGINE_API_KEY"]
|
|
66
63
|
genai.configure(api_key=self.api_key)
|
|
67
|
-
if
|
|
68
|
-
self.model = kwargs[
|
|
64
|
+
if "NEUROSYMBOLIC_ENGINE_MODEL" in kwargs:
|
|
65
|
+
self.model = kwargs["NEUROSYMBOLIC_ENGINE_MODEL"]
|
|
69
66
|
self.client = genai.GenerativeModel(model_name=self.model)
|
|
70
67
|
|
|
71
68
|
def compute_required_tokens(self, messages) -> int:
|
|
72
69
|
api_contents: list[types.Content] = []
|
|
73
70
|
|
|
74
71
|
for msg in messages:
|
|
75
|
-
if
|
|
76
|
-
|
|
77
|
-
for part in msg:
|
|
72
|
+
msg_parts = msg if isinstance(msg, list) else [msg]
|
|
73
|
+
for part in msg_parts:
|
|
78
74
|
if isinstance(part, str):
|
|
79
|
-
role =
|
|
75
|
+
role = "user"
|
|
80
76
|
content_str = part
|
|
81
77
|
elif isinstance(part, dict):
|
|
82
|
-
role = part.get(
|
|
83
|
-
content_str = str(part.get(
|
|
78
|
+
role = part.get("role")
|
|
79
|
+
content_str = str(part.get("content", ""))
|
|
84
80
|
current_message_api_parts: list[types.Part] = []
|
|
85
81
|
image_api_parts = self._handle_image_content(content_str)
|
|
86
82
|
current_message_api_parts.extend(image_api_parts)
|
|
@@ -96,168 +92,180 @@ class GeminiXReasoningEngine(Engine, GoogleMixin):
|
|
|
96
92
|
return 0
|
|
97
93
|
|
|
98
94
|
try:
|
|
99
|
-
count_response = self.client.models.count_tokens(
|
|
95
|
+
count_response = self.client.models.count_tokens(
|
|
96
|
+
model=self.model, contents=api_contents
|
|
97
|
+
)
|
|
100
98
|
return count_response.total_tokens
|
|
101
99
|
except Exception as e:
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
UserMessage(f"Gemini count_tokens failed: {e}")
|
|
101
|
+
UserMessage(f"Error counting tokens for Gemini: {e!s}", raise_with=RuntimeError)
|
|
104
102
|
|
|
105
|
-
def compute_remaining_tokens(self,
|
|
106
|
-
|
|
103
|
+
def compute_remaining_tokens(self, _prompts: list) -> int:
|
|
104
|
+
UserMessage("Token counting not implemented for Gemini", raise_with=NotImplementedError)
|
|
107
105
|
|
|
108
106
|
def _handle_document_content(self, content: str):
|
|
109
107
|
"""Handle document content by uploading to Gemini"""
|
|
110
108
|
try:
|
|
111
|
-
pattern = r
|
|
109
|
+
pattern = r"<<document:(.*?):>>"
|
|
112
110
|
matches = re.findall(pattern, content)
|
|
113
111
|
if not matches:
|
|
114
112
|
return None
|
|
115
113
|
|
|
116
114
|
doc_path = matches[0].strip()
|
|
117
|
-
if doc_path.startswith(
|
|
118
|
-
|
|
115
|
+
if doc_path.startswith("http"):
|
|
116
|
+
UserMessage("URL documents not yet supported for Gemini")
|
|
119
117
|
return None
|
|
120
|
-
|
|
121
|
-
uploaded_file = genai.upload_file(doc_path)
|
|
122
|
-
return uploaded_file
|
|
118
|
+
return genai.upload_file(doc_path)
|
|
123
119
|
except Exception as e:
|
|
124
|
-
|
|
120
|
+
UserMessage(f"Failed to process document: {e}")
|
|
125
121
|
return None
|
|
126
122
|
|
|
127
|
-
def _handle_image_content(self, content: str) -> list:
|
|
123
|
+
def _handle_image_content(self, content: str) -> list[types.Part]:
|
|
128
124
|
"""Handle image content by processing and preparing google.generativeai.types.Part objects."""
|
|
129
|
-
image_parts = []
|
|
130
|
-
|
|
131
|
-
matches = re.findall(pattern, content) # re must be imported
|
|
132
|
-
|
|
133
|
-
for match in matches:
|
|
134
|
-
img_src = match.strip()
|
|
135
|
-
|
|
125
|
+
image_parts: list[types.Part] = []
|
|
126
|
+
for img_src in self._extract_image_sources(content):
|
|
136
127
|
try:
|
|
137
|
-
|
|
138
|
-
header, encoded = img_src.split(',', 1)
|
|
139
|
-
mime_type = header.split(';')[0].split(':')[1]
|
|
140
|
-
image_bytes = base64.b64decode(encoded)
|
|
141
|
-
image_parts.append(genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes)))
|
|
142
|
-
|
|
143
|
-
elif img_src.startswith('http://') or img_src.startswith('https://'):
|
|
144
|
-
response = requests.get(img_src, timeout=10) # 10 seconds timeout
|
|
145
|
-
response.raise_for_status()
|
|
146
|
-
|
|
147
|
-
image_bytes = response.content
|
|
148
|
-
mime_type = response.headers.get('Content-Type', 'application/octet-stream')
|
|
149
|
-
|
|
150
|
-
if not mime_type.startswith('image/'):
|
|
151
|
-
CustomUserWarning(f"URL content type '{mime_type}' does not appear to be an image for: {img_src}. Attempting to use anyway.")
|
|
152
|
-
|
|
153
|
-
image_parts.append(genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes)))
|
|
154
|
-
|
|
155
|
-
elif img_src.startswith('frames:'):
|
|
156
|
-
temp_path = img_src.replace('frames:', '')
|
|
157
|
-
parts = temp_path.split(':', 1)
|
|
158
|
-
if len(parts) != 2:
|
|
159
|
-
CustomUserWarning(f"Invalid 'frames:' format: {img_src}")
|
|
160
|
-
continue
|
|
161
|
-
max_used_frames_str, actual_path = parts
|
|
162
|
-
try:
|
|
163
|
-
max_used_frames = int(max_used_frames_str)
|
|
164
|
-
except ValueError:
|
|
165
|
-
CustomUserWarning(f"Invalid max_frames number in 'frames:' format: {img_src}")
|
|
166
|
-
continue
|
|
167
|
-
|
|
168
|
-
frame_buffers, ext = encode_media_frames(actual_path)
|
|
169
|
-
|
|
170
|
-
mime_type = f'image/{ext.lower()}' if ext else 'application/octet-stream'
|
|
171
|
-
if ext and ext.lower() == 'jpg':
|
|
172
|
-
mime_type = 'image/jpeg'
|
|
173
|
-
|
|
174
|
-
if not frame_buffers:
|
|
175
|
-
CustomUserWarning(f"encode_media_frames returned no frames for: {actual_path}")
|
|
176
|
-
continue
|
|
177
|
-
|
|
178
|
-
step = max(1, len(frame_buffers) // 50)
|
|
179
|
-
indices = list(range(0, len(frame_buffers), step))[:max_used_frames]
|
|
180
|
-
|
|
181
|
-
for i_idx in indices:
|
|
182
|
-
if i_idx < len(frame_buffers):
|
|
183
|
-
image_bytes = frame_buffers[i_idx]
|
|
184
|
-
image_parts.append(genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes)))
|
|
185
|
-
|
|
186
|
-
else:
|
|
187
|
-
# Handle local file paths
|
|
188
|
-
local_file_path = Path(img_src)
|
|
189
|
-
if not local_file_path.is_file():
|
|
190
|
-
CustomUserWarning(f"Local image file not found: {img_src}")
|
|
191
|
-
continue
|
|
192
|
-
|
|
193
|
-
image_bytes = local_file_path.read_bytes()
|
|
194
|
-
mime_type, _ = mimetypes.guess_type(local_file_path)
|
|
195
|
-
if mime_type is None: # Fallback MIME type determination
|
|
196
|
-
file_ext = local_file_path.suffix.lower().lstrip('.')
|
|
197
|
-
if file_ext == 'jpg': mime_type = 'image/jpeg'
|
|
198
|
-
elif file_ext == 'png': mime_type = 'image/png'
|
|
199
|
-
elif file_ext == 'gif': mime_type = 'image/gif'
|
|
200
|
-
elif file_ext == 'webp': mime_type = 'image/webp'
|
|
201
|
-
else: mime_type = 'application/octet-stream'
|
|
202
|
-
|
|
203
|
-
image_parts.append(genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes)))
|
|
204
|
-
|
|
128
|
+
image_parts.extend(self._create_parts_from_image_source(img_src))
|
|
205
129
|
except Exception as e:
|
|
206
|
-
|
|
207
|
-
|
|
130
|
+
UserMessage(
|
|
131
|
+
f"Failed to process image source '{img_src}'. Error: {e!s}",
|
|
132
|
+
raise_with=ValueError,
|
|
133
|
+
)
|
|
208
134
|
return image_parts
|
|
209
135
|
|
|
136
|
+
def _extract_image_sources(self, content: str) -> list[str]:
|
|
137
|
+
pattern = r"<<vision:(.*?):>>"
|
|
138
|
+
return [match.strip() for match in re.findall(pattern, content)]
|
|
139
|
+
|
|
140
|
+
def _create_parts_from_image_source(self, img_src: str) -> list[types.Part]:
|
|
141
|
+
if img_src.startswith("data:image"):
|
|
142
|
+
return self._create_parts_from_data_uri(img_src)
|
|
143
|
+
if img_src.startswith(("http://", "https://")):
|
|
144
|
+
return self._create_parts_from_url(img_src)
|
|
145
|
+
if img_src.startswith("frames:"):
|
|
146
|
+
return self._create_parts_from_frames(img_src)
|
|
147
|
+
return self._create_parts_from_local_path(img_src)
|
|
148
|
+
|
|
149
|
+
def _create_parts_from_data_uri(self, img_src: str) -> list[types.Part]:
|
|
150
|
+
header, encoded = img_src.split(",", 1)
|
|
151
|
+
mime_type = header.split(";")[0].split(":")[1]
|
|
152
|
+
image_bytes = base64.b64decode(encoded)
|
|
153
|
+
part = genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes))
|
|
154
|
+
return [part]
|
|
155
|
+
|
|
156
|
+
def _create_parts_from_url(self, img_src: str) -> list[types.Part]:
|
|
157
|
+
response = requests.get(img_src, timeout=10)
|
|
158
|
+
response.raise_for_status()
|
|
159
|
+
image_bytes = response.content
|
|
160
|
+
mime_type = response.headers.get("Content-Type", "application/octet-stream")
|
|
161
|
+
if not mime_type.startswith("image/"):
|
|
162
|
+
UserMessage(
|
|
163
|
+
f"URL content type '{mime_type}' does not appear to be an image for: {img_src}. Attempting to use anyway."
|
|
164
|
+
)
|
|
165
|
+
part = genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes))
|
|
166
|
+
return [part]
|
|
167
|
+
|
|
168
|
+
def _create_parts_from_frames(self, img_src: str) -> list[types.Part]:
|
|
169
|
+
temp_path = img_src.replace("frames:", "")
|
|
170
|
+
parts = temp_path.split(":", 1)
|
|
171
|
+
if len(parts) != 2:
|
|
172
|
+
UserMessage(f"Invalid 'frames:' format: {img_src}")
|
|
173
|
+
return []
|
|
174
|
+
max_used_frames_str, actual_path = parts
|
|
175
|
+
try:
|
|
176
|
+
max_used_frames = int(max_used_frames_str)
|
|
177
|
+
except ValueError:
|
|
178
|
+
UserMessage(f"Invalid max_frames number in 'frames:' format: {img_src}")
|
|
179
|
+
return []
|
|
180
|
+
frame_buffers, ext = encode_media_frames(actual_path)
|
|
181
|
+
mime_type = f"image/{ext.lower()}" if ext else "application/octet-stream"
|
|
182
|
+
if ext and ext.lower() == "jpg":
|
|
183
|
+
mime_type = "image/jpeg"
|
|
184
|
+
if not frame_buffers:
|
|
185
|
+
UserMessage(f"encode_media_frames returned no frames for: {actual_path}")
|
|
186
|
+
return []
|
|
187
|
+
step = max(1, len(frame_buffers) // 50)
|
|
188
|
+
indices = list(range(0, len(frame_buffers), step))[:max_used_frames]
|
|
189
|
+
parts_list: list[types.Part] = []
|
|
190
|
+
for frame_idx in indices:
|
|
191
|
+
if frame_idx < len(frame_buffers):
|
|
192
|
+
image_bytes = frame_buffers[frame_idx]
|
|
193
|
+
parts_list.append(
|
|
194
|
+
genai.types.Part(
|
|
195
|
+
inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes)
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
return parts_list
|
|
199
|
+
|
|
200
|
+
def _create_parts_from_local_path(self, img_src: str) -> list[types.Part]:
|
|
201
|
+
local_file_path = Path(img_src)
|
|
202
|
+
if not local_file_path.is_file():
|
|
203
|
+
UserMessage(f"Local image file not found: {img_src}")
|
|
204
|
+
return []
|
|
205
|
+
image_bytes = local_file_path.read_bytes()
|
|
206
|
+
mime_type, _ = mimetypes.guess_type(local_file_path)
|
|
207
|
+
if mime_type is None:
|
|
208
|
+
file_ext = local_file_path.suffix.lower().lstrip(".")
|
|
209
|
+
if file_ext == "jpg":
|
|
210
|
+
mime_type = "image/jpeg"
|
|
211
|
+
elif file_ext == "png":
|
|
212
|
+
mime_type = "image/png"
|
|
213
|
+
elif file_ext == "gif":
|
|
214
|
+
mime_type = "image/gif"
|
|
215
|
+
elif file_ext == "webp":
|
|
216
|
+
mime_type = "image/webp"
|
|
217
|
+
else:
|
|
218
|
+
mime_type = "application/octet-stream"
|
|
219
|
+
part = genai.types.Part(inline_data=genai.types.Blob(mime_type=mime_type, data=image_bytes))
|
|
220
|
+
return [part]
|
|
221
|
+
|
|
210
222
|
def _handle_video_content(self, content: str):
|
|
211
223
|
"""Handle video content by uploading to Gemini"""
|
|
212
224
|
try:
|
|
213
|
-
pattern = r
|
|
225
|
+
pattern = r"<<video:(.*?):>>"
|
|
214
226
|
matches = re.findall(pattern, content)
|
|
215
227
|
if not matches:
|
|
216
228
|
return None
|
|
217
229
|
|
|
218
230
|
video_path = matches[0].strip()
|
|
219
|
-
if video_path.startswith(
|
|
220
|
-
|
|
231
|
+
if video_path.startswith("http"):
|
|
232
|
+
UserMessage("URL videos not yet supported for Gemini")
|
|
221
233
|
return None
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
uploaded_file = genai.upload_file(video_path)
|
|
225
|
-
return uploaded_file
|
|
234
|
+
# Upload local video
|
|
235
|
+
return genai.upload_file(video_path)
|
|
226
236
|
except Exception as e:
|
|
227
|
-
|
|
237
|
+
UserMessage(f"Failed to process video: {e}")
|
|
228
238
|
return None
|
|
229
239
|
|
|
230
240
|
def _handle_audio_content(self, content: str):
|
|
231
241
|
"""Handle audio content by uploading to Gemini"""
|
|
232
242
|
try:
|
|
233
|
-
pattern = r
|
|
243
|
+
pattern = r"<<audio:(.*?):>>"
|
|
234
244
|
matches = re.findall(pattern, content)
|
|
235
245
|
if not matches:
|
|
236
246
|
return None
|
|
237
247
|
|
|
238
248
|
audio_path = matches[0].strip()
|
|
239
|
-
if audio_path.startswith(
|
|
240
|
-
|
|
249
|
+
if audio_path.startswith("http"):
|
|
250
|
+
UserMessage("URL audio not yet supported for Gemini")
|
|
241
251
|
return None
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
uploaded_file = genai.upload_file(audio_path)
|
|
245
|
-
return uploaded_file
|
|
252
|
+
# Upload local audio
|
|
253
|
+
return genai.upload_file(audio_path)
|
|
246
254
|
except Exception as e:
|
|
247
|
-
|
|
255
|
+
UserMessage(f"Failed to process audio: {e}")
|
|
248
256
|
return None
|
|
249
257
|
|
|
250
258
|
def _remove_media_patterns(self, text: str) -> str:
|
|
251
259
|
"""Remove media pattern markers from text"""
|
|
252
260
|
patterns = [
|
|
253
|
-
r
|
|
254
|
-
r
|
|
255
|
-
r
|
|
256
|
-
r
|
|
261
|
+
r"<<vision:(.*?):>>",
|
|
262
|
+
r"<<video:(.*?):>>",
|
|
263
|
+
r"<<audio:(.*?):>>",
|
|
264
|
+
r"<<document:(.*?):>>",
|
|
257
265
|
]
|
|
258
266
|
|
|
259
267
|
for pattern in patterns:
|
|
260
|
-
text = re.sub(pattern,
|
|
268
|
+
text = re.sub(pattern, "", text)
|
|
261
269
|
|
|
262
270
|
return text
|
|
263
271
|
|
|
@@ -266,24 +274,24 @@ class GeminiXReasoningEngine(Engine, GoogleMixin):
|
|
|
266
274
|
media_content = []
|
|
267
275
|
|
|
268
276
|
# Process document content
|
|
269
|
-
if
|
|
277
|
+
if "<<document:" in processed_input_str:
|
|
270
278
|
doc_file = self._handle_document_content(processed_input_str)
|
|
271
279
|
if doc_file:
|
|
272
280
|
media_content.append(doc_file)
|
|
273
281
|
|
|
274
282
|
# Process image content
|
|
275
|
-
if
|
|
283
|
+
if "<<vision:" in processed_input_str:
|
|
276
284
|
image_files = self._handle_image_content(processed_input_str)
|
|
277
285
|
media_content.extend(image_files)
|
|
278
286
|
|
|
279
287
|
# Process video content
|
|
280
|
-
if
|
|
288
|
+
if "<<video:" in processed_input_str:
|
|
281
289
|
video_file = self._handle_video_content(processed_input_str)
|
|
282
290
|
if video_file:
|
|
283
291
|
media_content.append(video_file)
|
|
284
292
|
|
|
285
293
|
# Process audio content
|
|
286
|
-
if
|
|
294
|
+
if "<<audio:" in processed_input_str:
|
|
287
295
|
audio_file = self._handle_audio_content(processed_input_str)
|
|
288
296
|
if audio_file:
|
|
289
297
|
media_content.append(audio_file)
|
|
@@ -295,277 +303,312 @@ class GeminiXReasoningEngine(Engine, GoogleMixin):
|
|
|
295
303
|
thinking_content = ""
|
|
296
304
|
text_content = ""
|
|
297
305
|
|
|
298
|
-
if hasattr(res,
|
|
306
|
+
if hasattr(res, "candidates") and res.candidates:
|
|
299
307
|
candidate = res.candidates[0]
|
|
300
|
-
if hasattr(candidate,
|
|
308
|
+
if hasattr(candidate, "content") and candidate.content:
|
|
301
309
|
for part in candidate.content.parts:
|
|
302
|
-
if hasattr(part,
|
|
303
|
-
if hasattr(part,
|
|
310
|
+
if hasattr(part, "text") and part.text:
|
|
311
|
+
if hasattr(part, "thought") and part.thought:
|
|
304
312
|
thinking_content += part.text
|
|
305
313
|
else:
|
|
306
314
|
text_content += part.text
|
|
307
315
|
|
|
308
|
-
return {
|
|
309
|
-
"thinking": thinking_content,
|
|
310
|
-
"text": text_content
|
|
311
|
-
}
|
|
316
|
+
return {"thinking": thinking_content, "text": text_content}
|
|
312
317
|
|
|
313
318
|
def forward(self, argument):
|
|
314
319
|
kwargs = argument.kwargs
|
|
315
|
-
|
|
320
|
+
_system, prompt = argument.prop.prepared_input
|
|
316
321
|
payload = self._prepare_request_payload(argument)
|
|
317
|
-
except_remedy = kwargs.get(
|
|
322
|
+
except_remedy = kwargs.get("except_remedy")
|
|
318
323
|
|
|
319
|
-
contents =
|
|
320
|
-
for msg in prompt:
|
|
321
|
-
role = msg['role']
|
|
322
|
-
parts_list = msg['content']
|
|
323
|
-
contents.append(types.Content(role=role, parts=parts_list))
|
|
324
|
+
contents = self._build_contents_from_prompt(prompt)
|
|
324
325
|
|
|
325
326
|
try:
|
|
326
|
-
generation_config =
|
|
327
|
-
|
|
328
|
-
temperature=payload.get('temperature', 1.0),
|
|
329
|
-
top_p=payload.get('top_p', 0.95),
|
|
330
|
-
top_k=payload.get('top_k', 40),
|
|
331
|
-
stop_sequences=payload.get('stop_sequences'),
|
|
332
|
-
response_mime_type=payload.get('response_mime_type', 'text/plain'),
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
if payload.get('system_instruction'):
|
|
336
|
-
generation_config.system_instruction = payload['system_instruction']
|
|
337
|
-
|
|
338
|
-
if payload.get('thinking_config'):
|
|
339
|
-
generation_config.thinking_config = payload['thinking_config']
|
|
340
|
-
|
|
341
|
-
if payload.get('tools'):
|
|
342
|
-
generation_config.tools = payload['tools']
|
|
343
|
-
generation_config.automatic_function_calling=payload['automatic_function_calling']
|
|
344
|
-
|
|
345
|
-
res = self.client.models.generate_content(
|
|
346
|
-
model=kwargs.get('model', self.model),
|
|
347
|
-
contents=contents,
|
|
348
|
-
config=generation_config
|
|
349
|
-
)
|
|
350
|
-
|
|
327
|
+
generation_config = self._build_generation_config(payload)
|
|
328
|
+
res = self._generate_model_response(kwargs, contents, generation_config)
|
|
351
329
|
except Exception as e:
|
|
352
|
-
|
|
353
|
-
msg = 'Google API key is not set. Please set it in the config file or pass it as an argument to the command method.'
|
|
354
|
-
logging.error(msg)
|
|
355
|
-
if self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] is None or self.config['NEUROSYMBOLIC_ENGINE_API_KEY'] == '':
|
|
356
|
-
CustomUserWarning(msg, raise_with=ValueError)
|
|
357
|
-
self.api_key = self.config['NEUROSYMBOLIC_ENGINE_API_KEY']
|
|
358
|
-
genai.configure(api_key=self.api_key)
|
|
359
|
-
|
|
360
|
-
if except_remedy is not None:
|
|
361
|
-
res = except_remedy(self, e, self.client.generate_content, argument)
|
|
362
|
-
else:
|
|
363
|
-
CustomUserWarning(f'Error during generation. Caused by: {e}', raise_with=ValueError)
|
|
330
|
+
res = self._handle_generation_error(e, except_remedy, argument)
|
|
364
331
|
|
|
365
|
-
metadata = {
|
|
366
|
-
if payload.get(
|
|
332
|
+
metadata = {"raw_output": res}
|
|
333
|
+
if payload.get("tools"):
|
|
367
334
|
metadata = self._process_function_calls(res, metadata)
|
|
368
335
|
|
|
369
|
-
if kwargs.get(
|
|
336
|
+
if kwargs.get("raw_output", False):
|
|
370
337
|
return [res], metadata
|
|
371
338
|
|
|
372
339
|
output = self._collect_response(res)
|
|
373
340
|
|
|
374
|
-
if output[
|
|
375
|
-
metadata[
|
|
341
|
+
if output["thinking"]:
|
|
342
|
+
metadata["thinking"] = output["thinking"]
|
|
376
343
|
|
|
377
|
-
processed_text = output[
|
|
344
|
+
processed_text = output["text"]
|
|
378
345
|
if argument.prop.response_format:
|
|
379
|
-
|
|
380
|
-
processed_text = processed_text.replace('```json', '').replace('```', '')
|
|
346
|
+
processed_text = processed_text.replace("```json", "").replace("```", "")
|
|
381
347
|
|
|
382
348
|
return [processed_text], metadata
|
|
383
349
|
|
|
350
|
+
def _build_contents_from_prompt(self, prompt) -> list[types.Content]:
|
|
351
|
+
contents: list[types.Content] = []
|
|
352
|
+
for msg in prompt:
|
|
353
|
+
role = msg["role"]
|
|
354
|
+
parts_list = msg["content"]
|
|
355
|
+
contents.append(types.Content(role=role, parts=parts_list))
|
|
356
|
+
return contents
|
|
357
|
+
|
|
358
|
+
def _build_generation_config(self, payload: dict) -> types.GenerateContentConfig:
|
|
359
|
+
generation_config = types.GenerateContentConfig(
|
|
360
|
+
max_output_tokens=payload.get("max_output_tokens"),
|
|
361
|
+
temperature=payload.get("temperature", 1.0),
|
|
362
|
+
top_p=payload.get("top_p", 0.95),
|
|
363
|
+
top_k=payload.get("top_k", 40),
|
|
364
|
+
stop_sequences=payload.get("stop_sequences"),
|
|
365
|
+
response_mime_type=payload.get("response_mime_type", "text/plain"),
|
|
366
|
+
)
|
|
367
|
+
self._apply_optional_config_fields(generation_config, payload)
|
|
368
|
+
return generation_config
|
|
369
|
+
|
|
370
|
+
def _apply_optional_config_fields(
|
|
371
|
+
self, generation_config: types.GenerateContentConfig, payload: dict
|
|
372
|
+
) -> None:
|
|
373
|
+
if payload.get("system_instruction"):
|
|
374
|
+
generation_config.system_instruction = payload["system_instruction"]
|
|
375
|
+
if payload.get("thinking_config"):
|
|
376
|
+
generation_config.thinking_config = payload["thinking_config"]
|
|
377
|
+
if payload.get("tools"):
|
|
378
|
+
generation_config.tools = payload["tools"]
|
|
379
|
+
generation_config.automatic_function_calling = payload["automatic_function_calling"]
|
|
380
|
+
|
|
381
|
+
def _generate_model_response(
|
|
382
|
+
self,
|
|
383
|
+
kwargs: dict,
|
|
384
|
+
contents: list[types.Content],
|
|
385
|
+
generation_config: types.GenerateContentConfig,
|
|
386
|
+
):
|
|
387
|
+
return self.client.models.generate_content(
|
|
388
|
+
model=kwargs.get("model", self.model), contents=contents, config=generation_config
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
def _handle_generation_error(self, exception: Exception, except_remedy, argument):
|
|
392
|
+
if self.api_key is None or self.api_key == "":
|
|
393
|
+
msg = "Google API key is not set. Please set it in the config file or pass it as an argument to the command method."
|
|
394
|
+
UserMessage(msg)
|
|
395
|
+
if (
|
|
396
|
+
self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] is None
|
|
397
|
+
or self.config["NEUROSYMBOLIC_ENGINE_API_KEY"] == ""
|
|
398
|
+
):
|
|
399
|
+
UserMessage(msg, raise_with=ValueError)
|
|
400
|
+
self.api_key = self.config["NEUROSYMBOLIC_ENGINE_API_KEY"]
|
|
401
|
+
genai.configure(api_key=self.api_key)
|
|
402
|
+
if except_remedy is not None:
|
|
403
|
+
return except_remedy(self, exception, self.client.generate_content, argument)
|
|
404
|
+
UserMessage(f"Error during generation. Caused by: {exception}", raise_with=ValueError)
|
|
405
|
+
return None
|
|
406
|
+
|
|
384
407
|
def _process_function_calls(self, res, metadata):
|
|
385
408
|
hit = False
|
|
386
|
-
if hasattr(res,
|
|
409
|
+
if hasattr(res, "candidates") and res.candidates:
|
|
387
410
|
candidate = res.candidates[0]
|
|
388
|
-
if hasattr(candidate,
|
|
411
|
+
if hasattr(candidate, "content") and candidate.content:
|
|
389
412
|
for part in candidate.content.parts:
|
|
390
|
-
if hasattr(part,
|
|
413
|
+
if hasattr(part, "function_call") and part.function_call:
|
|
391
414
|
if hit:
|
|
392
|
-
|
|
415
|
+
UserMessage(
|
|
416
|
+
"Multiple function calls detected in the response but only the first one will be processed."
|
|
417
|
+
)
|
|
393
418
|
break
|
|
394
419
|
func_call = part.function_call
|
|
395
|
-
metadata[
|
|
396
|
-
|
|
397
|
-
|
|
420
|
+
metadata["function_call"] = {
|
|
421
|
+
"name": func_call.name,
|
|
422
|
+
"arguments": func_call.args,
|
|
398
423
|
}
|
|
399
424
|
hit = True
|
|
400
425
|
return metadata
|
|
401
426
|
|
|
402
427
|
def _prepare_raw_input(self, argument):
|
|
403
428
|
if not argument.prop.processed_input:
|
|
404
|
-
|
|
429
|
+
UserMessage(
|
|
430
|
+
"Need to provide a prompt instruction to the engine if `raw_input` is enabled!",
|
|
431
|
+
raise_with=ValueError,
|
|
432
|
+
)
|
|
405
433
|
|
|
406
434
|
raw_prompt_data = argument.prop.processed_input
|
|
407
|
-
|
|
408
|
-
system_instruction =
|
|
435
|
+
normalized_prompts = self._normalize_raw_prompt_data(raw_prompt_data)
|
|
436
|
+
system_instruction, non_system_messages = self._separate_system_instruction(
|
|
437
|
+
normalized_prompts
|
|
438
|
+
)
|
|
439
|
+
messages_for_api = self._build_raw_input_messages(non_system_messages)
|
|
440
|
+
return system_instruction, messages_for_api
|
|
409
441
|
|
|
442
|
+
def _normalize_raw_prompt_data(self, raw_prompt_data):
|
|
410
443
|
if isinstance(raw_prompt_data, str):
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
444
|
+
return [{"role": "user", "content": raw_prompt_data}]
|
|
445
|
+
if isinstance(raw_prompt_data, dict):
|
|
446
|
+
return [raw_prompt_data]
|
|
447
|
+
if isinstance(raw_prompt_data, list):
|
|
415
448
|
for item in raw_prompt_data:
|
|
416
449
|
if not isinstance(item, dict):
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
450
|
+
UserMessage(
|
|
451
|
+
f"Invalid item in raw_input list: {item}. Expected dict.",
|
|
452
|
+
raise_with=ValueError,
|
|
453
|
+
)
|
|
454
|
+
return raw_prompt_data
|
|
455
|
+
UserMessage(
|
|
456
|
+
f"Unsupported type for raw_input: {type(raw_prompt_data)}. Expected str, dict, or list of dicts.",
|
|
457
|
+
raise_with=ValueError,
|
|
458
|
+
)
|
|
459
|
+
return []
|
|
460
|
+
|
|
461
|
+
def _separate_system_instruction(self, normalized_prompts):
|
|
462
|
+
system_instruction = None
|
|
463
|
+
non_system_messages = []
|
|
423
464
|
for msg in normalized_prompts:
|
|
424
|
-
role = msg.get(
|
|
425
|
-
content = msg.get(
|
|
426
|
-
|
|
465
|
+
role = msg.get("role")
|
|
466
|
+
content = msg.get("content")
|
|
427
467
|
if role is None or content is None:
|
|
428
|
-
|
|
468
|
+
UserMessage(
|
|
469
|
+
f"Message in raw_input is missing 'role' or 'content': {msg}",
|
|
470
|
+
raise_with=ValueError,
|
|
471
|
+
)
|
|
429
472
|
if not isinstance(content, str):
|
|
430
|
-
|
|
431
|
-
|
|
473
|
+
UserMessage(
|
|
474
|
+
f"Message content for role '{role}' in raw_input must be a string. Found type: {type(content)} for content: {content}",
|
|
475
|
+
raise_with=ValueError,
|
|
476
|
+
)
|
|
477
|
+
if role == "system":
|
|
432
478
|
if system_instruction is not None:
|
|
433
|
-
|
|
479
|
+
UserMessage(
|
|
480
|
+
"Only one system instruction is allowed in raw_input mode!",
|
|
481
|
+
raise_with=ValueError,
|
|
482
|
+
)
|
|
434
483
|
system_instruction = content
|
|
435
484
|
else:
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
for msg in temp_non_system_messages:
|
|
439
|
-
content_str = str(msg.get('content', ''))
|
|
485
|
+
non_system_messages.append({"role": role, "content": content})
|
|
486
|
+
return system_instruction, non_system_messages
|
|
440
487
|
|
|
488
|
+
def _build_raw_input_messages(self, messages):
|
|
489
|
+
messages_for_api = []
|
|
490
|
+
for msg in messages:
|
|
491
|
+
content_str = str(msg.get("content", ""))
|
|
441
492
|
current_message_api_parts: list[types.Part] = []
|
|
442
|
-
|
|
443
493
|
image_api_parts = self._handle_image_content(content_str)
|
|
444
494
|
if image_api_parts:
|
|
445
495
|
current_message_api_parts.extend(image_api_parts)
|
|
446
|
-
|
|
447
496
|
text_only_content = self._remove_media_patterns(content_str)
|
|
448
497
|
if text_only_content:
|
|
449
498
|
current_message_api_parts.append(types.Part(text=text_only_content))
|
|
450
|
-
|
|
451
499
|
if current_message_api_parts:
|
|
452
|
-
messages_for_api.append({
|
|
453
|
-
|
|
454
|
-
'content': current_message_api_parts
|
|
455
|
-
})
|
|
456
|
-
|
|
457
|
-
return system_instruction, messages_for_api
|
|
500
|
+
messages_for_api.append({"role": msg["role"], "content": current_message_api_parts})
|
|
501
|
+
return messages_for_api
|
|
458
502
|
|
|
459
503
|
def prepare(self, argument):
|
|
460
|
-
|
|
504
|
+
# @NOTE: OpenAI compatibility at high level
|
|
461
505
|
if argument.prop.raw_input:
|
|
462
506
|
argument.prop.prepared_input = self._prepare_raw_input(argument)
|
|
463
507
|
return
|
|
464
508
|
|
|
465
|
-
|
|
509
|
+
processed_input_str = str(argument.prop.processed_input)
|
|
510
|
+
media_content = self._process_multimodal_content(processed_input_str)
|
|
511
|
+
system_content = self._compose_system_content(argument)
|
|
512
|
+
user_content = self._compose_user_content(argument)
|
|
513
|
+
system_content, user_content = self._apply_self_prompt_if_needed(
|
|
514
|
+
argument, system_content, user_content
|
|
515
|
+
)
|
|
466
516
|
|
|
467
|
-
|
|
468
|
-
|
|
517
|
+
user_prompt = self._build_user_prompt(media_content, user_content)
|
|
518
|
+
argument.prop.prepared_input = (system_content, [user_prompt])
|
|
469
519
|
|
|
520
|
+
def _compose_system_content(self, argument) -> str:
|
|
521
|
+
system_content = ""
|
|
522
|
+
_non_verbose_output = """<META_INSTRUCTION/>\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n\n"""
|
|
470
523
|
if argument.prop.suppress_verbose_output:
|
|
471
524
|
system_content += _non_verbose_output
|
|
472
|
-
system_content = f
|
|
473
|
-
|
|
525
|
+
system_content = f"{system_content}\n" if system_content and len(system_content) > 0 else ""
|
|
474
526
|
if argument.prop.response_format:
|
|
475
|
-
|
|
476
|
-
assert
|
|
477
|
-
if
|
|
478
|
-
system_content +=
|
|
479
|
-
|
|
527
|
+
response_format = argument.prop.response_format
|
|
528
|
+
assert response_format.get("type") is not None, "Response format type is required!"
|
|
529
|
+
if response_format["type"] == "json_object":
|
|
530
|
+
system_content += (
|
|
531
|
+
"<RESPONSE_FORMAT/>\nYou are a helpful assistant designed to output JSON.\n\n"
|
|
532
|
+
)
|
|
480
533
|
ref = argument.prop.instance
|
|
481
534
|
static_ctxt, dyn_ctxt = ref.global_context
|
|
482
535
|
if len(static_ctxt) > 0:
|
|
483
536
|
system_content += f"<STATIC_CONTEXT/>\n{static_ctxt}\n\n"
|
|
484
|
-
|
|
485
537
|
if len(dyn_ctxt) > 0:
|
|
486
538
|
system_content += f"<DYNAMIC_CONTEXT/>\n{dyn_ctxt}\n\n"
|
|
487
|
-
|
|
488
539
|
payload = argument.prop.payload
|
|
489
540
|
if argument.prop.payload:
|
|
490
|
-
system_content += f"<ADDITIONAL_CONTEXT/>\n{
|
|
491
|
-
|
|
541
|
+
system_content += f"<ADDITIONAL_CONTEXT/>\n{payload!s}\n\n"
|
|
492
542
|
examples: list[str] = argument.prop.examples
|
|
493
543
|
if examples and len(examples) > 0:
|
|
494
|
-
system_content += f"<EXAMPLES/>\n{
|
|
495
|
-
|
|
496
|
-
# Handle multimodal content
|
|
497
|
-
processed_input_str = str(argument.prop.processed_input)
|
|
498
|
-
media_content = self._process_multimodal_content(processed_input_str)
|
|
499
|
-
|
|
544
|
+
system_content += f"<EXAMPLES/>\n{examples!s}\n\n"
|
|
500
545
|
if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
|
|
501
546
|
val = str(argument.prop.prompt)
|
|
502
547
|
val = self._remove_media_patterns(val)
|
|
503
548
|
system_content += f"<INSTRUCTION/>\n{val}\n\n"
|
|
549
|
+
if argument.prop.template_suffix:
|
|
550
|
+
system_content += f" You will only generate content for the placeholder `{argument.prop.template_suffix!s}` following the instructions and the provided context information.\n\n"
|
|
551
|
+
return system_content
|
|
504
552
|
|
|
553
|
+
def _compose_user_content(self, argument) -> str:
|
|
505
554
|
suffix = str(argument.prop.processed_input)
|
|
506
555
|
suffix = self._remove_media_patterns(suffix)
|
|
507
|
-
|
|
556
|
+
return f"{suffix}"
|
|
508
557
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
# Handle self-prompting
|
|
513
|
-
if argument.prop.instance._kwargs.get('self_prompt', False) or argument.prop.self_prompt:
|
|
558
|
+
def _apply_self_prompt_if_needed(self, argument, system_content: str, user_content: str):
|
|
559
|
+
if argument.prop.instance._kwargs.get("self_prompt", False) or argument.prop.self_prompt:
|
|
514
560
|
self_prompter = SelfPrompt()
|
|
515
|
-
|
|
516
561
|
res = self_prompter(
|
|
517
|
-
{
|
|
518
|
-
max_tokens=argument.kwargs.get(
|
|
519
|
-
thinking=argument.kwargs.get(
|
|
562
|
+
{"user": user_content, "system": system_content},
|
|
563
|
+
max_tokens=argument.kwargs.get("max_tokens", self.max_response_tokens),
|
|
564
|
+
thinking=argument.kwargs.get("thinking", None),
|
|
520
565
|
)
|
|
521
566
|
if res is None:
|
|
522
|
-
|
|
567
|
+
UserMessage("Self-prompting failed!", raise_with=ValueError)
|
|
568
|
+
user_content = res["user"]
|
|
569
|
+
system_content = res["system"]
|
|
570
|
+
return system_content, user_content
|
|
523
571
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
all_user_content = []
|
|
528
|
-
all_user_content.extend(media_content) #
|
|
572
|
+
def _build_user_prompt(self, media_content, user_content: str) -> dict:
|
|
573
|
+
all_user_content = list(media_content)
|
|
529
574
|
if user_content.strip():
|
|
530
575
|
all_user_content.append(genai.types.Part(text=user_content.strip()))
|
|
531
|
-
|
|
532
576
|
if not all_user_content:
|
|
533
577
|
all_user_content = [genai.types.Part(text="N/A")]
|
|
534
|
-
|
|
535
|
-
user_prompt = {'role': 'user', 'content': all_user_content}
|
|
536
|
-
|
|
537
|
-
argument.prop.prepared_input = (system_content, [user_prompt])
|
|
578
|
+
return {"role": "user", "content": all_user_content}
|
|
538
579
|
|
|
539
580
|
def _prepare_request_payload(self, argument):
|
|
540
581
|
kwargs = argument.kwargs
|
|
541
582
|
|
|
542
583
|
payload = {
|
|
543
|
-
"max_output_tokens": kwargs.get(
|
|
544
|
-
"temperature": kwargs.get(
|
|
545
|
-
"top_p": kwargs.get(
|
|
546
|
-
"top_k": kwargs.get(
|
|
547
|
-
"stop_sequences": kwargs.get(
|
|
548
|
-
"stream": kwargs.get(
|
|
584
|
+
"max_output_tokens": kwargs.get("max_tokens", self.max_response_tokens),
|
|
585
|
+
"temperature": kwargs.get("temperature", 1.0),
|
|
586
|
+
"top_p": kwargs.get("top_p", 0.95),
|
|
587
|
+
"top_k": kwargs.get("top_k", 40),
|
|
588
|
+
"stop_sequences": kwargs.get("stop", None),
|
|
589
|
+
"stream": kwargs.get("stream", False),
|
|
549
590
|
}
|
|
550
591
|
|
|
551
592
|
system, _ = argument.prop.prepared_input
|
|
552
593
|
if system and system.strip():
|
|
553
|
-
payload[
|
|
594
|
+
payload["system_instruction"] = system.strip()
|
|
554
595
|
|
|
555
|
-
thinking_arg = kwargs.get(
|
|
596
|
+
thinking_arg = kwargs.get("thinking", None)
|
|
556
597
|
if thinking_arg and isinstance(thinking_arg, dict):
|
|
557
598
|
thinking_budget = thinking_arg.get("thinking_budget", 1024)
|
|
558
|
-
payload[
|
|
599
|
+
payload["thinking_config"] = types.ThinkingConfig(
|
|
600
|
+
include_thoughts=True, thinking_budget=thinking_budget
|
|
601
|
+
)
|
|
559
602
|
|
|
560
|
-
response_format = kwargs.get(
|
|
561
|
-
if response_format and response_format.get(
|
|
562
|
-
payload[
|
|
603
|
+
response_format = kwargs.get("response_format", None)
|
|
604
|
+
if response_format and response_format.get("type") == "json_object":
|
|
605
|
+
payload["response_mime_type"] = "application/json"
|
|
563
606
|
|
|
564
|
-
tools = kwargs.get(
|
|
607
|
+
tools = kwargs.get("tools")
|
|
565
608
|
if tools:
|
|
566
|
-
payload[
|
|
567
|
-
payload[
|
|
568
|
-
disable=kwargs.get(
|
|
609
|
+
payload["tools"] = self._convert_tools_format(tools)
|
|
610
|
+
payload["automatic_function_calling"] = types.AutomaticFunctionCallingConfig(
|
|
611
|
+
disable=kwargs.get("automatic_function_calling", True)
|
|
569
612
|
)
|
|
570
613
|
|
|
571
614
|
return payload
|
|
@@ -584,7 +627,9 @@ class GeminiXReasoningEngine(Engine, GoogleMixin):
|
|
|
584
627
|
elif isinstance(tool_item, types.FunctionDeclaration):
|
|
585
628
|
processed_tools.append(types.Tool(function_declarations=[tool_item]))
|
|
586
629
|
else:
|
|
587
|
-
|
|
630
|
+
UserMessage(
|
|
631
|
+
f"Ignoring invalid tool format. Expected a callable, google.genai.types.Tool, or google.genai.types.FunctionDeclaration: {tool_item}"
|
|
632
|
+
)
|
|
588
633
|
|
|
589
634
|
if not processed_tools:
|
|
590
635
|
return None
|