ultimate-gemini-mcp 3.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,304 @@
1
+ """
2
+ Gemini API client for Gemini 3 Pro Image generation.
3
+ Uses the official Google GenAI SDK.
4
+ """
5
+
6
+ import asyncio
7
+ import base64
8
+ import io
9
+ import logging
10
+ from functools import partial
11
+ from typing import Any
12
+
13
+ from google import genai
14
+ from google.genai import types
15
+ from PIL import Image
16
+
17
+ from ..config.constants import GEMINI_MODELS
18
+ from ..core.exceptions import (
19
+ APIError,
20
+ AuthenticationError,
21
+ ContentPolicyError,
22
+ RateLimitError,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class GeminiClient:
29
+ """Client for Gemini 3 Pro Image API using official Google GenAI SDK."""
30
+
31
+ def __init__(self, api_key: str, timeout: int = 60):
32
+ """
33
+ Initialize Gemini client.
34
+
35
+ Args:
36
+ api_key: Gemini API key
37
+ timeout: Request timeout in seconds
38
+ """
39
+ self.api_key = api_key
40
+ self.timeout = timeout
41
+ self.client = genai.Client(api_key=api_key)
42
+
43
+ async def generate_image(
44
+ self,
45
+ prompt: str,
46
+ *,
47
+ model: str = "gemini-3-pro-image-preview",
48
+ reference_images: list[str] | None = None,
49
+ aspect_ratio: str | None = None,
50
+ image_size: str = "2K",
51
+ response_modalities: list[str] | None = None,
52
+ enable_google_search: bool = False,
53
+ **kwargs: Any,
54
+ ) -> dict[str, Any]:
55
+ """
56
+ Generate or edit an image using Gemini 3 Pro Image.
57
+
58
+ Args:
59
+ prompt: Text prompt for image generation or editing instruction
60
+ model: Model to use (default: gemini-3-pro-image-preview)
61
+ reference_images: List of base64-encoded reference images (up to 14)
62
+ aspect_ratio: Desired aspect ratio (optional)
63
+ image_size: Image resolution (1K, 2K, 4K - default: 2K)
64
+ response_modalities: Response types (TEXT, IMAGE - default: ["TEXT", "IMAGE"])
65
+ enable_google_search: Enable Google Search grounding for real-time data
66
+ **kwargs: Additional parameters
67
+
68
+ Returns:
69
+ Dict with 'images' key containing list of base64-encoded image data,
70
+ 'thoughts' key for thinking process, and 'text' key for text responses
71
+
72
+ Raises:
73
+ APIError: If the API request fails
74
+ """
75
+ model_id = GEMINI_MODELS.get(model, model)
76
+
77
+ try:
78
+ # Build contents list with reference images and prompt
79
+ contents: list[Any] = []
80
+
81
+ # Add reference images if provided (up to 14)
82
+ if reference_images:
83
+ for ref_image_b64 in reference_images[:14]: # Limit to max 14
84
+ # Decode base64 to bytes for PIL Image
85
+ image_bytes = base64.b64decode(ref_image_b64)
86
+ image = Image.open(io.BytesIO(image_bytes))
87
+ contents.append(image)
88
+
89
+ # Add text prompt
90
+ contents.append(prompt)
91
+
92
+ # Build configuration
93
+ if response_modalities is None:
94
+ response_modalities = ["TEXT", "IMAGE"]
95
+
96
+ # Build image config (SDK 1.52+ supports both aspect_ratio and image_size)
97
+ image_config = types.ImageConfig(
98
+ aspect_ratio=aspect_ratio if aspect_ratio else None,
99
+ image_size=image_size if image_size else None,
100
+ )
101
+
102
+ # Build generation config
103
+ config_args: dict[str, Any] = {
104
+ "response_modalities": response_modalities,
105
+ "image_config": image_config,
106
+ }
107
+
108
+ # Add Google Search grounding if enabled
109
+ if enable_google_search:
110
+ config_args["tools"] = [{"google_search": {}}]
111
+
112
+ config = types.GenerateContentConfig(**config_args)
113
+
114
+ logger.info(f"Generating image with model: {model_id}")
115
+ logger.info(f"Contents: {len(contents)} items")
116
+ logger.info(f"Config: {config}")
117
+ logger.info(f"Aspect ratio: {aspect_ratio}, Image size: {image_size}")
118
+
119
+ # Generate content using official SDK (run in executor since it's synchronous)
120
+ loop = asyncio.get_event_loop()
121
+ response = await loop.run_in_executor(
122
+ None,
123
+ partial(
124
+ self.client.models.generate_content,
125
+ model=model_id,
126
+ contents=contents,
127
+ config=config,
128
+ ),
129
+ )
130
+
131
+ # Extract images, thoughts, and text from response
132
+ extraction_result = self._extract_content_from_response(response)
133
+ images = extraction_result["images"]
134
+ thoughts = extraction_result["thoughts"]
135
+ text_parts = extraction_result["text"]
136
+
137
+ if not images and "IMAGE" in response_modalities:
138
+ logger.error(
139
+ f"No images extracted from response. Response has {len(response.parts)} parts"
140
+ )
141
+ logger.error(f"Thoughts extracted: {len(thoughts)}, Text parts: {len(text_parts)}")
142
+ logger.error(f"Response_modalities: {response_modalities}")
143
+ for idx, part in enumerate(response.parts):
144
+ logger.error(
145
+ f" Part {idx}: has_inline_data={hasattr(part, 'inline_data')}, has_text={hasattr(part, 'text')}, thought={getattr(part, 'thought', None)}, thought_sig={hasattr(part, 'thought_signature')}"
146
+ )
147
+ raise APIError("No image data found in Gemini API response")
148
+
149
+ result = {
150
+ "images": images,
151
+ "text": text_parts,
152
+ "thoughts": thoughts,
153
+ "model": model,
154
+ }
155
+
156
+ # Include grounding metadata if Google Search was used
157
+ if enable_google_search and hasattr(response, "grounding_metadata"):
158
+ result["grounding_metadata"] = response.grounding_metadata
159
+
160
+ return result
161
+
162
+ except Exception as e:
163
+ logger.error(f"Gemini API request failed: {e}")
164
+ self._handle_exception(e)
165
+ raise APIError(f"Gemini API request failed: {e}") from e
166
+
167
+ async def generate_text(
168
+ self,
169
+ prompt: str,
170
+ *,
171
+ model: str = "gemini-flash-latest",
172
+ system_instruction: str | None = None,
173
+ ) -> str:
174
+ """
175
+ Generate text using Gemini (for prompt enhancement).
176
+
177
+ Args:
178
+ prompt: Text prompt
179
+ model: Model to use
180
+ system_instruction: Optional system instruction
181
+
182
+ Returns:
183
+ Generated text response
184
+ """
185
+ model_id = GEMINI_MODELS.get(model, model)
186
+
187
+ try:
188
+ # Build config with proper types instead of using **kwargs
189
+ config = (
190
+ types.GenerateContentConfig(system_instruction=system_instruction)
191
+ if system_instruction
192
+ else None
193
+ )
194
+
195
+ # Run in executor since genai SDK is synchronous
196
+ loop = asyncio.get_event_loop()
197
+ response = await loop.run_in_executor(
198
+ None,
199
+ partial(
200
+ self.client.models.generate_content,
201
+ model=model_id,
202
+ contents=prompt,
203
+ config=config,
204
+ ),
205
+ )
206
+
207
+ # Extract text from response
208
+ return response.text or ""
209
+
210
+ except Exception as e:
211
+ logger.error(f"Gemini text generation failed: {e}")
212
+ raise APIError(f"Gemini text generation failed: {e}") from e
213
+
214
+ def _extract_content_from_response(self, response: Any) -> dict[str, Any]:
215
+ """
216
+ Extract images, text, and thoughts from Gemini SDK response.
217
+
218
+ The genai SDK automatically handles thought signatures, so we just
219
+ need to extract the content.
220
+
221
+ Returns dict with keys:
222
+ - images: List of base64-encoded image data
223
+ - text: List of text strings
224
+ - thoughts: List of thought objects with images and text
225
+ """
226
+ images: list[str] = []
227
+ text_parts: list[str] = []
228
+ thoughts: list[dict[str, Any]] = []
229
+
230
+ try:
231
+ logger.info(f"Response has {len(response.parts)} parts")
232
+ # Iterate through all parts in the response
233
+ for idx, part in enumerate(response.parts):
234
+ logger.info(
235
+ f"Part {idx}: type={type(part)}, has_inline_data={hasattr(part, 'inline_data')}, has_text={hasattr(part, 'text')}, has_thought={hasattr(part, 'thought')}, has_thought_sig={hasattr(part, 'thought_signature')}"
236
+ )
237
+ # Check if this is a thought (thinking process)
238
+ is_thought = getattr(part, "thought", False)
239
+
240
+ # Extract image data using SDK's as_image() method
241
+ if hasattr(part, "inline_data"):
242
+ try:
243
+ logger.info(f"Part {idx} has inline_data, attempting to extract image...")
244
+ image = part.as_image()
245
+ if image:
246
+ logger.info(f"Successfully got PIL image: {image.size}")
247
+ # Convert PIL Image to base64
248
+ buffer = io.BytesIO()
249
+ # Save as PNG - use positional argument instead of keyword
250
+ image.save(buffer, "PNG")
251
+ image_b64 = base64.b64encode(buffer.getvalue()).decode()
252
+
253
+ if is_thought:
254
+ logger.info(f"Adding to thoughts (is_thought={is_thought})")
255
+ thoughts.append(
256
+ {"type": "image", "data": image_b64, "index": len(thoughts)}
257
+ )
258
+ else:
259
+ logger.info(f"Adding to images (is_thought={is_thought})")
260
+ images.append(image_b64)
261
+ else:
262
+ logger.warning(f"Part {idx}: as_image() returned None")
263
+ except Exception as e:
264
+ logger.error(f"Could not extract image from part {idx}: {e}", exc_info=True)
265
+
266
+ # Extract text
267
+ if hasattr(part, "text") and part.text:
268
+ if is_thought:
269
+ thoughts.append({"type": "text", "data": part.text, "index": len(thoughts)})
270
+ else:
271
+ text_parts.append(part.text)
272
+
273
+ except Exception as e:
274
+ logger.error(f"Error extracting content from response: {e}", exc_info=True)
275
+
276
+ logger.info(
277
+ f"Extraction complete: {len(images)} images, {len(text_parts)} text parts, {len(thoughts)} thoughts"
278
+ )
279
+ return {
280
+ "images": images,
281
+ "text": text_parts,
282
+ "thoughts": thoughts,
283
+ }
284
+
285
+ def _handle_exception(self, error: Exception) -> None:
286
+ """Handle exceptions from genai SDK."""
287
+ error_msg = str(error)
288
+
289
+ logger.error(f"API request failed: {error_msg}")
290
+
291
+ # Try to determine error type from message
292
+ if "authentication" in error_msg.lower() or "api key" in error_msg.lower():
293
+ raise AuthenticationError("Authentication failed. Please check your Gemini API key.")
294
+ elif "rate limit" in error_msg.lower() or "quota" in error_msg.lower():
295
+ raise RateLimitError("Rate limit exceeded. Please try again later.")
296
+ elif "safety" in error_msg.lower() or "blocked" in error_msg.lower():
297
+ raise ContentPolicyError(
298
+ "Content was blocked by safety filters. Please modify your prompt."
299
+ )
300
+
301
+ async def close(self) -> None:
302
+ """Close the Gemini client (genai SDK handles cleanup automatically)."""
303
+ # genai SDK doesn't require explicit cleanup
304
+ pass
@@ -0,0 +1,174 @@
1
+ """
2
+ Image service for Gemini 3 Pro Image API.
3
+ Provides interface for image generation using Gemini 3 Pro Image.
4
+ """
5
+
6
+ import base64
7
+ import logging
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from ..config.constants import GEMINI_MODELS
13
+ from ..core import sanitize_filename
14
+ from ..core.exceptions import ImageProcessingError
15
+ from .gemini_client import GeminiClient
16
+ from .prompt_enhancer import PromptEnhancer
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ImageResult:
22
+ """Container for generated image data and metadata."""
23
+
24
+ def __init__(
25
+ self,
26
+ image_data: str,
27
+ prompt: str,
28
+ model: str,
29
+ index: int = 0,
30
+ metadata: dict[str, Any] | None = None,
31
+ ):
32
+ self.image_data = image_data # Base64-encoded
33
+ self.prompt = prompt
34
+ self.model = model
35
+ self.index = index
36
+ self.metadata = metadata or {}
37
+ self.timestamp = datetime.now()
38
+
39
+ def save(self, output_dir: Path, filename: str | None = None) -> Path:
40
+ """Save image to disk."""
41
+ if filename is None:
42
+ filename = self._generate_filename()
43
+
44
+ output_path = output_dir / filename
45
+
46
+ try:
47
+ # Decode base64 and save
48
+ image_bytes = base64.b64decode(self.image_data)
49
+ output_path.write_bytes(image_bytes)
50
+ logger.info(f"Saved image to {output_path}")
51
+ return output_path
52
+ except Exception as e:
53
+ raise ImageProcessingError(f"Failed to save image: {e}") from e
54
+
55
+ def _generate_filename(self) -> str:
56
+ """Generate clean, short filename."""
57
+ timestamp = self.timestamp.strftime("%Y%m%d_%H%M%S")
58
+ # Shorten model name
59
+ model_short = self.model.replace("gemini-3-pro-image-preview", "gemini3").replace(
60
+ "imagen-4-", "img4-"
61
+ )
62
+ # Sanitize and shorten prompt (max 30 chars)
63
+ prompt_snippet = sanitize_filename(self.prompt[:30])
64
+ index_str = f"_{self.index + 1}" if self.index > 0 else ""
65
+ return f"{model_short}_{timestamp}_{prompt_snippet}{index_str}.png"
66
+
67
+ def get_size(self) -> int:
68
+ """Get image size in bytes."""
69
+ return len(base64.b64decode(self.image_data))
70
+
71
+
72
+ class ImageService:
73
+ """Service for image generation using Gemini 3 Pro Image."""
74
+
75
+ def __init__(self, api_key: str, *, enable_enhancement: bool = True, timeout: int = 60):
76
+ """
77
+ Initialize image service.
78
+
79
+ Args:
80
+ api_key: API key for Gemini API
81
+ enable_enhancement: Enable automatic prompt enhancement
82
+ timeout: Request timeout in seconds
83
+ """
84
+ self.api_key = api_key
85
+ self.enable_enhancement = enable_enhancement
86
+ self.timeout = timeout
87
+
88
+ # Initialize Gemini client
89
+ self.gemini_client = GeminiClient(api_key, timeout)
90
+ self.prompt_enhancer: PromptEnhancer | None = None
91
+
92
+ if enable_enhancement:
93
+ # Prompt enhancer uses the same Gemini client
94
+ self.prompt_enhancer = PromptEnhancer(self.gemini_client)
95
+
96
+ async def generate(
97
+ self, prompt: str, *, model: str | None = None, enhance_prompt: bool = True, **kwargs: Any
98
+ ) -> list[ImageResult]:
99
+ """
100
+ Generate images using Gemini 3 Pro Image API.
101
+
102
+ Args:
103
+ prompt: Text prompt for image generation
104
+ model: Model to use (default: gemini-3-pro-image-preview)
105
+ enhance_prompt: Whether to enhance the prompt
106
+ **kwargs: Additional parameters (aspect_ratio, reference_images, etc.)
107
+
108
+ Returns:
109
+ List of ImageResult objects
110
+ """
111
+ # Use Gemini 3 Pro Image
112
+ if model is None:
113
+ model = "gemini-3-pro-image-preview"
114
+
115
+ if model not in GEMINI_MODELS:
116
+ raise ValueError(f"Unknown model: {model}. Only Gemini 3 Pro Image is supported.")
117
+
118
+ # Enhance prompt if enabled
119
+ original_prompt = prompt
120
+ enhancement_context = self._build_enhancement_context(kwargs)
121
+
122
+ if enhance_prompt and self.enable_enhancement and self.prompt_enhancer:
123
+ try:
124
+ result = await self.prompt_enhancer.enhance_prompt(
125
+ prompt, context=enhancement_context
126
+ )
127
+ prompt = result["enhanced_prompt"]
128
+ logger.info(f"Prompt enhanced: {len(original_prompt)} -> {len(prompt)} chars")
129
+ except Exception as e:
130
+ logger.warning(f"Prompt enhancement failed: {e}")
131
+
132
+ # Generate images using Gemini API
133
+ return await self._generate_with_gemini(prompt, model, original_prompt, kwargs)
134
+
135
+ async def _generate_with_gemini(
136
+ self, prompt: str, model: str, original_prompt: str, params: dict[str, Any]
137
+ ) -> list[ImageResult]:
138
+ """Generate images using Gemini API."""
139
+ response = await self.gemini_client.generate_image(prompt=prompt, model=model, **params)
140
+
141
+ images = response["images"]
142
+ results = []
143
+
144
+ for i, image_data in enumerate(images):
145
+ result = ImageResult(
146
+ image_data=image_data,
147
+ prompt=original_prompt,
148
+ model=model,
149
+ index=i,
150
+ metadata={"enhanced_prompt": prompt, "api": "gemini", **params},
151
+ )
152
+ results.append(result)
153
+
154
+ return results
155
+
156
+ def _build_enhancement_context(self, params: dict[str, Any]) -> dict[str, Any]:
157
+ """Build context for prompt enhancement."""
158
+ context: dict[str, Any] = {}
159
+
160
+ if "reference_images" in params and params["reference_images"]:
161
+ context["has_reference_images"] = True
162
+ context["num_reference_images"] = len(params["reference_images"])
163
+
164
+ if "aspect_ratio" in params:
165
+ context["aspect_ratio"] = params["aspect_ratio"]
166
+
167
+ if params.get("enable_google_search"):
168
+ context["use_google_search"] = True
169
+
170
+ return context
171
+
172
+ async def close(self) -> None:
173
+ """Close Gemini client."""
174
+ await self.gemini_client.close()
@@ -0,0 +1,137 @@
1
+ """
2
+ Prompt enhancement service using Gemini Flash.
3
+ Automatically optimizes prompts for better image generation results.
4
+ """
5
+
6
+ import logging
7
+ from typing import Any
8
+
9
+ from .gemini_client import GeminiClient
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ PROMPT_ENHANCEMENT_SYSTEM_INSTRUCTION = """You are an expert prompt engineer for AI image generation models. Your task is to enhance user prompts to produce the best possible results.
15
+
16
+ Follow these guidelines:
17
+ 1. Preserve the user's core intent and subject matter
18
+ 2. Add specific, professional details about:
19
+ - Composition (framing, perspective, angle)
20
+ - Lighting (type, quality, direction, mood)
21
+ - Materials and textures
22
+ - Atmosphere and mood
23
+ - Artistic style (if appropriate)
24
+ 3. Use photographic and cinematic terminology when relevant
25
+ 4. Be hyper-specific rather than generic
26
+ 5. For portraits: describe features, expressions, clothing
27
+ 6. For scenes: describe environment, weather, time of day
28
+ 7. Keep prompts concise but detailed (aim for 100-300 words)
29
+ 8. NEVER use hex color values (like #FF0000). Always describe colors using natural language (e.g., "dark red", "neon blue", "warm amber", "deep crimson")
30
+ 9. Output ONLY the enhanced prompt, no explanations"""
31
+
32
+
33
+ class PromptEnhancer:
34
+ """Service for enhancing image generation prompts."""
35
+
36
+ def __init__(self, gemini_client: GeminiClient):
37
+ """
38
+ Initialize prompt enhancer.
39
+
40
+ Args:
41
+ gemini_client: Gemini client for text generation
42
+ """
43
+ self.gemini_client = gemini_client
44
+
45
+ async def enhance_prompt(
46
+ self,
47
+ original_prompt: str,
48
+ *,
49
+ context: dict[str, Any] | None = None,
50
+ ) -> dict[str, str]:
51
+ """
52
+ Enhance a prompt for better image generation.
53
+
54
+ Args:
55
+ original_prompt: Original user prompt
56
+ context: Optional context (features, image type, etc.)
57
+
58
+ Returns:
59
+ Dict with 'enhanced_prompt' and 'original_prompt'
60
+ """
61
+ # Build enhancement instruction
62
+ instruction = self._build_enhancement_instruction(original_prompt, context)
63
+
64
+ try:
65
+ enhanced = await self.gemini_client.generate_text(
66
+ prompt=instruction,
67
+ system_instruction=PROMPT_ENHANCEMENT_SYSTEM_INSTRUCTION,
68
+ model="gemini-flash-latest",
69
+ )
70
+
71
+ # Clean up the enhanced prompt
72
+ enhanced = enhanced.strip()
73
+
74
+ logger.info(f"Enhanced prompt: {len(original_prompt)} -> {len(enhanced)} chars")
75
+
76
+ return {
77
+ "original_prompt": original_prompt,
78
+ "enhanced_prompt": enhanced,
79
+ }
80
+
81
+ except Exception as e:
82
+ logger.warning(f"Prompt enhancement failed, using original: {e}")
83
+ return {
84
+ "original_prompt": original_prompt,
85
+ "enhanced_prompt": original_prompt,
86
+ }
87
+
88
+ def _build_enhancement_instruction(self, prompt: str, context: dict[str, Any] | None) -> str:
89
+ """Build the instruction for prompt enhancement."""
90
+ instruction_parts = [f"Enhance this image generation prompt:\n\n{prompt}"]
91
+
92
+ if context:
93
+ # Add context hints
94
+ if context.get("is_editing"):
95
+ instruction_parts.append("\nContext: This is for image editing/modification")
96
+
97
+ if context.get("maintain_character_consistency"):
98
+ instruction_parts.append(
99
+ "\nIMPORTANT: Describe the character with specific, consistent features "
100
+ "for use across multiple generations"
101
+ )
102
+
103
+ if context.get("blend_images"):
104
+ instruction_parts.append(
105
+ "\nContext: Multiple images will be blended. Describe how elements "
106
+ "should be composed naturally together"
107
+ )
108
+
109
+ if context.get("use_world_knowledge"):
110
+ instruction_parts.append(
111
+ "\nContext: Include accurate real-world details for historical figures, "
112
+ "landmarks, or factual scenarios"
113
+ )
114
+
115
+ if context.get("aspect_ratio"):
116
+ ratio = context["aspect_ratio"]
117
+ if ratio in ["16:9", "21:9"]:
118
+ instruction_parts.append("\nFormat: Wide landscape composition")
119
+ elif ratio in ["9:16", "2:3", "3:4"]:
120
+ instruction_parts.append("\nFormat: Vertical/portrait composition")
121
+
122
+ return "\n".join(instruction_parts)
123
+
124
+
125
+ async def create_prompt_enhancer(api_key: str, timeout: int = 30) -> PromptEnhancer:
126
+ """
127
+ Factory function to create prompt enhancer.
128
+
129
+ Args:
130
+ api_key: Gemini API key
131
+ timeout: Request timeout
132
+
133
+ Returns:
134
+ PromptEnhancer instance
135
+ """
136
+ gemini_client = GeminiClient(api_key=api_key, timeout=timeout)
137
+ return PromptEnhancer(gemini_client)
src/tools/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """Tools module for Ultimate Gemini MCP."""
2
+
3
+ from .batch_generate import batch_generate_images, register_batch_generate_tool
4
+ from .generate_image import generate_image_tool, register_generate_image_tool
5
+
6
+ __all__ = [
7
+ "generate_image_tool",
8
+ "register_generate_image_tool",
9
+ "batch_generate_images",
10
+ "register_batch_generate_tool",
11
+ ]