ultimate-gemini-mcp 3.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +16 -0
- src/config/__init__.py +44 -0
- src/config/constants.py +68 -0
- src/config/settings.py +143 -0
- src/core/__init__.py +47 -0
- src/core/exceptions.py +62 -0
- src/core/validation.py +117 -0
- src/server.py +168 -0
- src/services/__init__.py +13 -0
- src/services/gemini_client.py +304 -0
- src/services/image_service.py +174 -0
- src/services/prompt_enhancer.py +137 -0
- src/tools/__init__.py +11 -0
- src/tools/batch_generate.py +181 -0
- src/tools/generate_image.py +240 -0
- ultimate_gemini_mcp-3.0.7.dist-info/METADATA +462 -0
- ultimate_gemini_mcp-3.0.7.dist-info/RECORD +20 -0
- ultimate_gemini_mcp-3.0.7.dist-info/WHEEL +4 -0
- ultimate_gemini_mcp-3.0.7.dist-info/entry_points.txt +2 -0
- ultimate_gemini_mcp-3.0.7.dist-info/licenses/LICENSE +31 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gemini API client for Gemini 3 Pro Image generation.
|
|
3
|
+
Uses the official Google GenAI SDK.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import base64
|
|
8
|
+
import io
|
|
9
|
+
import logging
|
|
10
|
+
from functools import partial
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from google import genai
|
|
14
|
+
from google.genai import types
|
|
15
|
+
from PIL import Image
|
|
16
|
+
|
|
17
|
+
from ..config.constants import GEMINI_MODELS
|
|
18
|
+
from ..core.exceptions import (
|
|
19
|
+
APIError,
|
|
20
|
+
AuthenticationError,
|
|
21
|
+
ContentPolicyError,
|
|
22
|
+
RateLimitError,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class GeminiClient:
|
|
29
|
+
"""Client for Gemini 3 Pro Image API using official Google GenAI SDK."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, api_key: str, timeout: int = 60):
|
|
32
|
+
"""
|
|
33
|
+
Initialize Gemini client.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
api_key: Gemini API key
|
|
37
|
+
timeout: Request timeout in seconds
|
|
38
|
+
"""
|
|
39
|
+
self.api_key = api_key
|
|
40
|
+
self.timeout = timeout
|
|
41
|
+
self.client = genai.Client(api_key=api_key)
|
|
42
|
+
|
|
43
|
+
async def generate_image(
|
|
44
|
+
self,
|
|
45
|
+
prompt: str,
|
|
46
|
+
*,
|
|
47
|
+
model: str = "gemini-3-pro-image-preview",
|
|
48
|
+
reference_images: list[str] | None = None,
|
|
49
|
+
aspect_ratio: str | None = None,
|
|
50
|
+
image_size: str = "2K",
|
|
51
|
+
response_modalities: list[str] | None = None,
|
|
52
|
+
enable_google_search: bool = False,
|
|
53
|
+
**kwargs: Any,
|
|
54
|
+
) -> dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Generate or edit an image using Gemini 3 Pro Image.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
prompt: Text prompt for image generation or editing instruction
|
|
60
|
+
model: Model to use (default: gemini-3-pro-image-preview)
|
|
61
|
+
reference_images: List of base64-encoded reference images (up to 14)
|
|
62
|
+
aspect_ratio: Desired aspect ratio (optional)
|
|
63
|
+
image_size: Image resolution (1K, 2K, 4K - default: 2K)
|
|
64
|
+
response_modalities: Response types (TEXT, IMAGE - default: ["TEXT", "IMAGE"])
|
|
65
|
+
enable_google_search: Enable Google Search grounding for real-time data
|
|
66
|
+
**kwargs: Additional parameters
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Dict with 'images' key containing list of base64-encoded image data,
|
|
70
|
+
'thoughts' key for thinking process, and 'text' key for text responses
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
APIError: If the API request fails
|
|
74
|
+
"""
|
|
75
|
+
model_id = GEMINI_MODELS.get(model, model)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# Build contents list with reference images and prompt
|
|
79
|
+
contents: list[Any] = []
|
|
80
|
+
|
|
81
|
+
# Add reference images if provided (up to 14)
|
|
82
|
+
if reference_images:
|
|
83
|
+
for ref_image_b64 in reference_images[:14]: # Limit to max 14
|
|
84
|
+
# Decode base64 to bytes for PIL Image
|
|
85
|
+
image_bytes = base64.b64decode(ref_image_b64)
|
|
86
|
+
image = Image.open(io.BytesIO(image_bytes))
|
|
87
|
+
contents.append(image)
|
|
88
|
+
|
|
89
|
+
# Add text prompt
|
|
90
|
+
contents.append(prompt)
|
|
91
|
+
|
|
92
|
+
# Build configuration
|
|
93
|
+
if response_modalities is None:
|
|
94
|
+
response_modalities = ["TEXT", "IMAGE"]
|
|
95
|
+
|
|
96
|
+
# Build image config (SDK 1.52+ supports both aspect_ratio and image_size)
|
|
97
|
+
image_config = types.ImageConfig(
|
|
98
|
+
aspect_ratio=aspect_ratio if aspect_ratio else None,
|
|
99
|
+
image_size=image_size if image_size else None,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Build generation config
|
|
103
|
+
config_args: dict[str, Any] = {
|
|
104
|
+
"response_modalities": response_modalities,
|
|
105
|
+
"image_config": image_config,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# Add Google Search grounding if enabled
|
|
109
|
+
if enable_google_search:
|
|
110
|
+
config_args["tools"] = [{"google_search": {}}]
|
|
111
|
+
|
|
112
|
+
config = types.GenerateContentConfig(**config_args)
|
|
113
|
+
|
|
114
|
+
logger.info(f"Generating image with model: {model_id}")
|
|
115
|
+
logger.info(f"Contents: {len(contents)} items")
|
|
116
|
+
logger.info(f"Config: {config}")
|
|
117
|
+
logger.info(f"Aspect ratio: {aspect_ratio}, Image size: {image_size}")
|
|
118
|
+
|
|
119
|
+
# Generate content using official SDK (run in executor since it's synchronous)
|
|
120
|
+
loop = asyncio.get_event_loop()
|
|
121
|
+
response = await loop.run_in_executor(
|
|
122
|
+
None,
|
|
123
|
+
partial(
|
|
124
|
+
self.client.models.generate_content,
|
|
125
|
+
model=model_id,
|
|
126
|
+
contents=contents,
|
|
127
|
+
config=config,
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Extract images, thoughts, and text from response
|
|
132
|
+
extraction_result = self._extract_content_from_response(response)
|
|
133
|
+
images = extraction_result["images"]
|
|
134
|
+
thoughts = extraction_result["thoughts"]
|
|
135
|
+
text_parts = extraction_result["text"]
|
|
136
|
+
|
|
137
|
+
if not images and "IMAGE" in response_modalities:
|
|
138
|
+
logger.error(
|
|
139
|
+
f"No images extracted from response. Response has {len(response.parts)} parts"
|
|
140
|
+
)
|
|
141
|
+
logger.error(f"Thoughts extracted: {len(thoughts)}, Text parts: {len(text_parts)}")
|
|
142
|
+
logger.error(f"Response_modalities: {response_modalities}")
|
|
143
|
+
for idx, part in enumerate(response.parts):
|
|
144
|
+
logger.error(
|
|
145
|
+
f" Part {idx}: has_inline_data={hasattr(part, 'inline_data')}, has_text={hasattr(part, 'text')}, thought={getattr(part, 'thought', None)}, thought_sig={hasattr(part, 'thought_signature')}"
|
|
146
|
+
)
|
|
147
|
+
raise APIError("No image data found in Gemini API response")
|
|
148
|
+
|
|
149
|
+
result = {
|
|
150
|
+
"images": images,
|
|
151
|
+
"text": text_parts,
|
|
152
|
+
"thoughts": thoughts,
|
|
153
|
+
"model": model,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Include grounding metadata if Google Search was used
|
|
157
|
+
if enable_google_search and hasattr(response, "grounding_metadata"):
|
|
158
|
+
result["grounding_metadata"] = response.grounding_metadata
|
|
159
|
+
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.error(f"Gemini API request failed: {e}")
|
|
164
|
+
self._handle_exception(e)
|
|
165
|
+
raise APIError(f"Gemini API request failed: {e}") from e
|
|
166
|
+
|
|
167
|
+
async def generate_text(
|
|
168
|
+
self,
|
|
169
|
+
prompt: str,
|
|
170
|
+
*,
|
|
171
|
+
model: str = "gemini-flash-latest",
|
|
172
|
+
system_instruction: str | None = None,
|
|
173
|
+
) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Generate text using Gemini (for prompt enhancement).
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
prompt: Text prompt
|
|
179
|
+
model: Model to use
|
|
180
|
+
system_instruction: Optional system instruction
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Generated text response
|
|
184
|
+
"""
|
|
185
|
+
model_id = GEMINI_MODELS.get(model, model)
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
# Build config with proper types instead of using **kwargs
|
|
189
|
+
config = (
|
|
190
|
+
types.GenerateContentConfig(system_instruction=system_instruction)
|
|
191
|
+
if system_instruction
|
|
192
|
+
else None
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Run in executor since genai SDK is synchronous
|
|
196
|
+
loop = asyncio.get_event_loop()
|
|
197
|
+
response = await loop.run_in_executor(
|
|
198
|
+
None,
|
|
199
|
+
partial(
|
|
200
|
+
self.client.models.generate_content,
|
|
201
|
+
model=model_id,
|
|
202
|
+
contents=prompt,
|
|
203
|
+
config=config,
|
|
204
|
+
),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Extract text from response
|
|
208
|
+
return response.text or ""
|
|
209
|
+
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error(f"Gemini text generation failed: {e}")
|
|
212
|
+
raise APIError(f"Gemini text generation failed: {e}") from e
|
|
213
|
+
|
|
214
|
+
def _extract_content_from_response(self, response: Any) -> dict[str, Any]:
|
|
215
|
+
"""
|
|
216
|
+
Extract images, text, and thoughts from Gemini SDK response.
|
|
217
|
+
|
|
218
|
+
The genai SDK automatically handles thought signatures, so we just
|
|
219
|
+
need to extract the content.
|
|
220
|
+
|
|
221
|
+
Returns dict with keys:
|
|
222
|
+
- images: List of base64-encoded image data
|
|
223
|
+
- text: List of text strings
|
|
224
|
+
- thoughts: List of thought objects with images and text
|
|
225
|
+
"""
|
|
226
|
+
images: list[str] = []
|
|
227
|
+
text_parts: list[str] = []
|
|
228
|
+
thoughts: list[dict[str, Any]] = []
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
logger.info(f"Response has {len(response.parts)} parts")
|
|
232
|
+
# Iterate through all parts in the response
|
|
233
|
+
for idx, part in enumerate(response.parts):
|
|
234
|
+
logger.info(
|
|
235
|
+
f"Part {idx}: type={type(part)}, has_inline_data={hasattr(part, 'inline_data')}, has_text={hasattr(part, 'text')}, has_thought={hasattr(part, 'thought')}, has_thought_sig={hasattr(part, 'thought_signature')}"
|
|
236
|
+
)
|
|
237
|
+
# Check if this is a thought (thinking process)
|
|
238
|
+
is_thought = getattr(part, "thought", False)
|
|
239
|
+
|
|
240
|
+
# Extract image data using SDK's as_image() method
|
|
241
|
+
if hasattr(part, "inline_data"):
|
|
242
|
+
try:
|
|
243
|
+
logger.info(f"Part {idx} has inline_data, attempting to extract image...")
|
|
244
|
+
image = part.as_image()
|
|
245
|
+
if image:
|
|
246
|
+
logger.info(f"Successfully got PIL image: {image.size}")
|
|
247
|
+
# Convert PIL Image to base64
|
|
248
|
+
buffer = io.BytesIO()
|
|
249
|
+
# Save as PNG - use positional argument instead of keyword
|
|
250
|
+
image.save(buffer, "PNG")
|
|
251
|
+
image_b64 = base64.b64encode(buffer.getvalue()).decode()
|
|
252
|
+
|
|
253
|
+
if is_thought:
|
|
254
|
+
logger.info(f"Adding to thoughts (is_thought={is_thought})")
|
|
255
|
+
thoughts.append(
|
|
256
|
+
{"type": "image", "data": image_b64, "index": len(thoughts)}
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
logger.info(f"Adding to images (is_thought={is_thought})")
|
|
260
|
+
images.append(image_b64)
|
|
261
|
+
else:
|
|
262
|
+
logger.warning(f"Part {idx}: as_image() returned None")
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.error(f"Could not extract image from part {idx}: {e}", exc_info=True)
|
|
265
|
+
|
|
266
|
+
# Extract text
|
|
267
|
+
if hasattr(part, "text") and part.text:
|
|
268
|
+
if is_thought:
|
|
269
|
+
thoughts.append({"type": "text", "data": part.text, "index": len(thoughts)})
|
|
270
|
+
else:
|
|
271
|
+
text_parts.append(part.text)
|
|
272
|
+
|
|
273
|
+
except Exception as e:
|
|
274
|
+
logger.error(f"Error extracting content from response: {e}", exc_info=True)
|
|
275
|
+
|
|
276
|
+
logger.info(
|
|
277
|
+
f"Extraction complete: {len(images)} images, {len(text_parts)} text parts, {len(thoughts)} thoughts"
|
|
278
|
+
)
|
|
279
|
+
return {
|
|
280
|
+
"images": images,
|
|
281
|
+
"text": text_parts,
|
|
282
|
+
"thoughts": thoughts,
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
def _handle_exception(self, error: Exception) -> None:
|
|
286
|
+
"""Handle exceptions from genai SDK."""
|
|
287
|
+
error_msg = str(error)
|
|
288
|
+
|
|
289
|
+
logger.error(f"API request failed: {error_msg}")
|
|
290
|
+
|
|
291
|
+
# Try to determine error type from message
|
|
292
|
+
if "authentication" in error_msg.lower() or "api key" in error_msg.lower():
|
|
293
|
+
raise AuthenticationError("Authentication failed. Please check your Gemini API key.")
|
|
294
|
+
elif "rate limit" in error_msg.lower() or "quota" in error_msg.lower():
|
|
295
|
+
raise RateLimitError("Rate limit exceeded. Please try again later.")
|
|
296
|
+
elif "safety" in error_msg.lower() or "blocked" in error_msg.lower():
|
|
297
|
+
raise ContentPolicyError(
|
|
298
|
+
"Content was blocked by safety filters. Please modify your prompt."
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
async def close(self) -> None:
|
|
302
|
+
"""Close the Gemini client (genai SDK handles cleanup automatically)."""
|
|
303
|
+
# genai SDK doesn't require explicit cleanup
|
|
304
|
+
pass
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Image service for Gemini 3 Pro Image API.
|
|
3
|
+
Provides interface for image generation using Gemini 3 Pro Image.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ..config.constants import GEMINI_MODELS
|
|
13
|
+
from ..core import sanitize_filename
|
|
14
|
+
from ..core.exceptions import ImageProcessingError
|
|
15
|
+
from .gemini_client import GeminiClient
|
|
16
|
+
from .prompt_enhancer import PromptEnhancer
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ImageResult:
|
|
22
|
+
"""Container for generated image data and metadata."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
image_data: str,
|
|
27
|
+
prompt: str,
|
|
28
|
+
model: str,
|
|
29
|
+
index: int = 0,
|
|
30
|
+
metadata: dict[str, Any] | None = None,
|
|
31
|
+
):
|
|
32
|
+
self.image_data = image_data # Base64-encoded
|
|
33
|
+
self.prompt = prompt
|
|
34
|
+
self.model = model
|
|
35
|
+
self.index = index
|
|
36
|
+
self.metadata = metadata or {}
|
|
37
|
+
self.timestamp = datetime.now()
|
|
38
|
+
|
|
39
|
+
def save(self, output_dir: Path, filename: str | None = None) -> Path:
|
|
40
|
+
"""Save image to disk."""
|
|
41
|
+
if filename is None:
|
|
42
|
+
filename = self._generate_filename()
|
|
43
|
+
|
|
44
|
+
output_path = output_dir / filename
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
# Decode base64 and save
|
|
48
|
+
image_bytes = base64.b64decode(self.image_data)
|
|
49
|
+
output_path.write_bytes(image_bytes)
|
|
50
|
+
logger.info(f"Saved image to {output_path}")
|
|
51
|
+
return output_path
|
|
52
|
+
except Exception as e:
|
|
53
|
+
raise ImageProcessingError(f"Failed to save image: {e}") from e
|
|
54
|
+
|
|
55
|
+
def _generate_filename(self) -> str:
|
|
56
|
+
"""Generate clean, short filename."""
|
|
57
|
+
timestamp = self.timestamp.strftime("%Y%m%d_%H%M%S")
|
|
58
|
+
# Shorten model name
|
|
59
|
+
model_short = self.model.replace("gemini-3-pro-image-preview", "gemini3").replace(
|
|
60
|
+
"imagen-4-", "img4-"
|
|
61
|
+
)
|
|
62
|
+
# Sanitize and shorten prompt (max 30 chars)
|
|
63
|
+
prompt_snippet = sanitize_filename(self.prompt[:30])
|
|
64
|
+
index_str = f"_{self.index + 1}" if self.index > 0 else ""
|
|
65
|
+
return f"{model_short}_{timestamp}_{prompt_snippet}{index_str}.png"
|
|
66
|
+
|
|
67
|
+
def get_size(self) -> int:
|
|
68
|
+
"""Get image size in bytes."""
|
|
69
|
+
return len(base64.b64decode(self.image_data))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class ImageService:
|
|
73
|
+
"""Service for image generation using Gemini 3 Pro Image."""
|
|
74
|
+
|
|
75
|
+
def __init__(self, api_key: str, *, enable_enhancement: bool = True, timeout: int = 60):
|
|
76
|
+
"""
|
|
77
|
+
Initialize image service.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
api_key: API key for Gemini API
|
|
81
|
+
enable_enhancement: Enable automatic prompt enhancement
|
|
82
|
+
timeout: Request timeout in seconds
|
|
83
|
+
"""
|
|
84
|
+
self.api_key = api_key
|
|
85
|
+
self.enable_enhancement = enable_enhancement
|
|
86
|
+
self.timeout = timeout
|
|
87
|
+
|
|
88
|
+
# Initialize Gemini client
|
|
89
|
+
self.gemini_client = GeminiClient(api_key, timeout)
|
|
90
|
+
self.prompt_enhancer: PromptEnhancer | None = None
|
|
91
|
+
|
|
92
|
+
if enable_enhancement:
|
|
93
|
+
# Prompt enhancer uses the same Gemini client
|
|
94
|
+
self.prompt_enhancer = PromptEnhancer(self.gemini_client)
|
|
95
|
+
|
|
96
|
+
async def generate(
|
|
97
|
+
self, prompt: str, *, model: str | None = None, enhance_prompt: bool = True, **kwargs: Any
|
|
98
|
+
) -> list[ImageResult]:
|
|
99
|
+
"""
|
|
100
|
+
Generate images using Gemini 3 Pro Image API.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
prompt: Text prompt for image generation
|
|
104
|
+
model: Model to use (default: gemini-3-pro-image-preview)
|
|
105
|
+
enhance_prompt: Whether to enhance the prompt
|
|
106
|
+
**kwargs: Additional parameters (aspect_ratio, reference_images, etc.)
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of ImageResult objects
|
|
110
|
+
"""
|
|
111
|
+
# Use Gemini 3 Pro Image
|
|
112
|
+
if model is None:
|
|
113
|
+
model = "gemini-3-pro-image-preview"
|
|
114
|
+
|
|
115
|
+
if model not in GEMINI_MODELS:
|
|
116
|
+
raise ValueError(f"Unknown model: {model}. Only Gemini 3 Pro Image is supported.")
|
|
117
|
+
|
|
118
|
+
# Enhance prompt if enabled
|
|
119
|
+
original_prompt = prompt
|
|
120
|
+
enhancement_context = self._build_enhancement_context(kwargs)
|
|
121
|
+
|
|
122
|
+
if enhance_prompt and self.enable_enhancement and self.prompt_enhancer:
|
|
123
|
+
try:
|
|
124
|
+
result = await self.prompt_enhancer.enhance_prompt(
|
|
125
|
+
prompt, context=enhancement_context
|
|
126
|
+
)
|
|
127
|
+
prompt = result["enhanced_prompt"]
|
|
128
|
+
logger.info(f"Prompt enhanced: {len(original_prompt)} -> {len(prompt)} chars")
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.warning(f"Prompt enhancement failed: {e}")
|
|
131
|
+
|
|
132
|
+
# Generate images using Gemini API
|
|
133
|
+
return await self._generate_with_gemini(prompt, model, original_prompt, kwargs)
|
|
134
|
+
|
|
135
|
+
async def _generate_with_gemini(
|
|
136
|
+
self, prompt: str, model: str, original_prompt: str, params: dict[str, Any]
|
|
137
|
+
) -> list[ImageResult]:
|
|
138
|
+
"""Generate images using Gemini API."""
|
|
139
|
+
response = await self.gemini_client.generate_image(prompt=prompt, model=model, **params)
|
|
140
|
+
|
|
141
|
+
images = response["images"]
|
|
142
|
+
results = []
|
|
143
|
+
|
|
144
|
+
for i, image_data in enumerate(images):
|
|
145
|
+
result = ImageResult(
|
|
146
|
+
image_data=image_data,
|
|
147
|
+
prompt=original_prompt,
|
|
148
|
+
model=model,
|
|
149
|
+
index=i,
|
|
150
|
+
metadata={"enhanced_prompt": prompt, "api": "gemini", **params},
|
|
151
|
+
)
|
|
152
|
+
results.append(result)
|
|
153
|
+
|
|
154
|
+
return results
|
|
155
|
+
|
|
156
|
+
def _build_enhancement_context(self, params: dict[str, Any]) -> dict[str, Any]:
|
|
157
|
+
"""Build context for prompt enhancement."""
|
|
158
|
+
context: dict[str, Any] = {}
|
|
159
|
+
|
|
160
|
+
if "reference_images" in params and params["reference_images"]:
|
|
161
|
+
context["has_reference_images"] = True
|
|
162
|
+
context["num_reference_images"] = len(params["reference_images"])
|
|
163
|
+
|
|
164
|
+
if "aspect_ratio" in params:
|
|
165
|
+
context["aspect_ratio"] = params["aspect_ratio"]
|
|
166
|
+
|
|
167
|
+
if params.get("enable_google_search"):
|
|
168
|
+
context["use_google_search"] = True
|
|
169
|
+
|
|
170
|
+
return context
|
|
171
|
+
|
|
172
|
+
async def close(self) -> None:
|
|
173
|
+
"""Close Gemini client."""
|
|
174
|
+
await self.gemini_client.close()
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prompt enhancement service using Gemini Flash.
|
|
3
|
+
Automatically optimizes prompts for better image generation results.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .gemini_client import GeminiClient
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
PROMPT_ENHANCEMENT_SYSTEM_INSTRUCTION = """You are an expert prompt engineer for AI image generation models. Your task is to enhance user prompts to produce the best possible results.
|
|
15
|
+
|
|
16
|
+
Follow these guidelines:
|
|
17
|
+
1. Preserve the user's core intent and subject matter
|
|
18
|
+
2. Add specific, professional details about:
|
|
19
|
+
- Composition (framing, perspective, angle)
|
|
20
|
+
- Lighting (type, quality, direction, mood)
|
|
21
|
+
- Materials and textures
|
|
22
|
+
- Atmosphere and mood
|
|
23
|
+
- Artistic style (if appropriate)
|
|
24
|
+
3. Use photographic and cinematic terminology when relevant
|
|
25
|
+
4. Be hyper-specific rather than generic
|
|
26
|
+
5. For portraits: describe features, expressions, clothing
|
|
27
|
+
6. For scenes: describe environment, weather, time of day
|
|
28
|
+
7. Keep prompts concise but detailed (aim for 100-300 words)
|
|
29
|
+
8. NEVER use hex color values (like #FF0000). Always describe colors using natural language (e.g., "dark red", "neon blue", "warm amber", "deep crimson")
|
|
30
|
+
9. Output ONLY the enhanced prompt, no explanations"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class PromptEnhancer:
|
|
34
|
+
"""Service for enhancing image generation prompts."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, gemini_client: GeminiClient):
|
|
37
|
+
"""
|
|
38
|
+
Initialize prompt enhancer.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
gemini_client: Gemini client for text generation
|
|
42
|
+
"""
|
|
43
|
+
self.gemini_client = gemini_client
|
|
44
|
+
|
|
45
|
+
async def enhance_prompt(
|
|
46
|
+
self,
|
|
47
|
+
original_prompt: str,
|
|
48
|
+
*,
|
|
49
|
+
context: dict[str, Any] | None = None,
|
|
50
|
+
) -> dict[str, str]:
|
|
51
|
+
"""
|
|
52
|
+
Enhance a prompt for better image generation.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
original_prompt: Original user prompt
|
|
56
|
+
context: Optional context (features, image type, etc.)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Dict with 'enhanced_prompt' and 'original_prompt'
|
|
60
|
+
"""
|
|
61
|
+
# Build enhancement instruction
|
|
62
|
+
instruction = self._build_enhancement_instruction(original_prompt, context)
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
enhanced = await self.gemini_client.generate_text(
|
|
66
|
+
prompt=instruction,
|
|
67
|
+
system_instruction=PROMPT_ENHANCEMENT_SYSTEM_INSTRUCTION,
|
|
68
|
+
model="gemini-flash-latest",
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Clean up the enhanced prompt
|
|
72
|
+
enhanced = enhanced.strip()
|
|
73
|
+
|
|
74
|
+
logger.info(f"Enhanced prompt: {len(original_prompt)} -> {len(enhanced)} chars")
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"original_prompt": original_prompt,
|
|
78
|
+
"enhanced_prompt": enhanced,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.warning(f"Prompt enhancement failed, using original: {e}")
|
|
83
|
+
return {
|
|
84
|
+
"original_prompt": original_prompt,
|
|
85
|
+
"enhanced_prompt": original_prompt,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
def _build_enhancement_instruction(self, prompt: str, context: dict[str, Any] | None) -> str:
|
|
89
|
+
"""Build the instruction for prompt enhancement."""
|
|
90
|
+
instruction_parts = [f"Enhance this image generation prompt:\n\n{prompt}"]
|
|
91
|
+
|
|
92
|
+
if context:
|
|
93
|
+
# Add context hints
|
|
94
|
+
if context.get("is_editing"):
|
|
95
|
+
instruction_parts.append("\nContext: This is for image editing/modification")
|
|
96
|
+
|
|
97
|
+
if context.get("maintain_character_consistency"):
|
|
98
|
+
instruction_parts.append(
|
|
99
|
+
"\nIMPORTANT: Describe the character with specific, consistent features "
|
|
100
|
+
"for use across multiple generations"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if context.get("blend_images"):
|
|
104
|
+
instruction_parts.append(
|
|
105
|
+
"\nContext: Multiple images will be blended. Describe how elements "
|
|
106
|
+
"should be composed naturally together"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if context.get("use_world_knowledge"):
|
|
110
|
+
instruction_parts.append(
|
|
111
|
+
"\nContext: Include accurate real-world details for historical figures, "
|
|
112
|
+
"landmarks, or factual scenarios"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
if context.get("aspect_ratio"):
|
|
116
|
+
ratio = context["aspect_ratio"]
|
|
117
|
+
if ratio in ["16:9", "21:9"]:
|
|
118
|
+
instruction_parts.append("\nFormat: Wide landscape composition")
|
|
119
|
+
elif ratio in ["9:16", "2:3", "3:4"]:
|
|
120
|
+
instruction_parts.append("\nFormat: Vertical/portrait composition")
|
|
121
|
+
|
|
122
|
+
return "\n".join(instruction_parts)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
async def create_prompt_enhancer(api_key: str, timeout: int = 30) -> PromptEnhancer:
|
|
126
|
+
"""
|
|
127
|
+
Factory function to create prompt enhancer.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
api_key: Gemini API key
|
|
131
|
+
timeout: Request timeout
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
PromptEnhancer instance
|
|
135
|
+
"""
|
|
136
|
+
gemini_client = GeminiClient(api_key=api_key, timeout=timeout)
|
|
137
|
+
return PromptEnhancer(gemini_client)
|
src/tools/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Tools module for Ultimate Gemini MCP."""
|
|
2
|
+
|
|
3
|
+
from .batch_generate import batch_generate_images, register_batch_generate_tool
|
|
4
|
+
from .generate_image import generate_image_tool, register_generate_image_tool
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"generate_image_tool",
|
|
8
|
+
"register_generate_image_tool",
|
|
9
|
+
"batch_generate_images",
|
|
10
|
+
"register_batch_generate_tool",
|
|
11
|
+
]
|