ultimate-gemini-mcp 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultimate-gemini-mcp might be problematic. Click here for more details.

src/server.py ADDED
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Ultimate Gemini MCP Server - Main Entry Point
4
+
5
+ Unified MCP server supporting:
6
+ - Gemini 2.5 Flash Image (with prompt enhancement and editing)
7
+ - Imagen 3, 4, and 4-Ultra (with advanced controls)
8
+ - Batch processing, prompt templates, and comprehensive features
9
+ """
10
+
11
+ import logging
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ from fastmcp import FastMCP
16
+
17
+ from .config import ALL_MODELS, get_settings
18
+ from .tools import register_batch_generate_tool, register_generate_image_tool
19
+
20
+ # Set up logging
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
24
+ stream=sys.stderr, # Important: use stderr for logging in MCP
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def create_app() -> FastMCP:
31
+ """
32
+ Create and configure the Ultimate Gemini MCP application.
33
+
34
+ This is the factory function used by FastMCP CLI.
35
+ """
36
+ logger.info("Initializing Ultimate Gemini MCP Server...")
37
+
38
+ try:
39
+ # Load settings (validates API key)
40
+ settings = get_settings()
41
+
42
+ logger.info(f"Output directory: {settings.output_dir}")
43
+ logger.info(f"Prompt enhancement: {settings.api.enable_prompt_enhancement}")
44
+ logger.info(f"Available models: {', '.join(ALL_MODELS.keys())}")
45
+
46
+ # Create FastMCP server
47
+ mcp = FastMCP(
48
+ "Ultimate Gemini MCP",
49
+ version="1.0.0",
50
+ )
51
+
52
+ # Register tools
53
+ register_generate_image_tool(mcp)
54
+ register_batch_generate_tool(mcp)
55
+
56
+ # Add resources
57
+ @mcp.resource("models://list")
58
+ def list_models() -> str:
59
+ """List all available image generation models."""
60
+ import json
61
+
62
+ models_info = {
63
+ "gemini": {
64
+ "gemini-2.5-flash-image": {
65
+ "name": "Gemini 2.5 Flash Image",
66
+ "description": "Advanced image generation with editing and prompt enhancement",
67
+ "features": [
68
+ "Prompt enhancement",
69
+ "Image editing",
70
+ "Character consistency",
71
+ "Multi-image blending",
72
+ "World knowledge integration"
73
+ ],
74
+ "default": True
75
+ }
76
+ },
77
+ "imagen": {
78
+ "imagen-4": {
79
+ "name": "Imagen 4",
80
+ "description": "High-quality image generation with improved text rendering",
81
+ "features": [
82
+ "Enhanced quality",
83
+ "Better text rendering",
84
+ "Negative prompts",
85
+ "Seed-based reproducibility",
86
+ "Person generation controls",
87
+ "Advanced controls"
88
+ ]
89
+ },
90
+ "imagen-4-fast": {
91
+ "name": "Imagen 4 Fast",
92
+ "description": "Optimized for faster generation while maintaining good quality",
93
+ "features": [
94
+ "Faster generation speed",
95
+ "Good quality output",
96
+ "Negative prompts",
97
+ "Seed-based reproducibility",
98
+ "Person generation controls",
99
+ "Cost-effective"
100
+ ]
101
+ },
102
+ "imagen-4-ultra": {
103
+ "name": "Imagen 4 Ultra",
104
+ "description": "Highest quality with best prompt adherence",
105
+ "features": [
106
+ "Highest quality",
107
+ "Best prompt adherence",
108
+ "Professional results",
109
+ "Enhanced text rendering",
110
+ "Advanced controls"
111
+ ]
112
+ }
113
+ }
114
+ }
115
+
116
+ return json.dumps(models_info, indent=2)
117
+
118
+ @mcp.resource("settings://config")
119
+ def get_config() -> str:
120
+ """Get current server configuration."""
121
+ import json
122
+
123
+ config = {
124
+ "output_directory": str(settings.output_dir),
125
+ "prompt_enhancement_enabled": settings.api.enable_prompt_enhancement,
126
+ "batch_processing_enabled": settings.api.enable_batch_processing,
127
+ "default_gemini_model": settings.api.default_gemini_model,
128
+ "default_imagen_model": settings.api.default_imagen_model,
129
+ "max_batch_size": settings.api.max_batch_size,
130
+ "request_timeout": settings.api.request_timeout,
131
+ "default_aspect_ratio": settings.api.default_aspect_ratio,
132
+ "default_output_format": settings.api.default_output_format,
133
+ }
134
+
135
+ return json.dumps(config, indent=2)
136
+
137
+ logger.info("Ultimate Gemini MCP Server initialized successfully")
138
+ return mcp
139
+
140
+ except Exception as e:
141
+ logger.error(f"Failed to initialize server: {e}", exc_info=True)
142
+ raise
143
+
144
+
145
+ def main() -> None:
146
+ """Main entry point for direct execution."""
147
+ try:
148
+ logger.info("Starting Ultimate Gemini MCP Server...")
149
+
150
+ # Create application
151
+ app = create_app()
152
+
153
+ # Run the server (FastMCP handles stdio transport)
154
+ logger.info("Server is ready and listening for MCP requests")
155
+ app.run()
156
+
157
+ except KeyboardInterrupt:
158
+ logger.info("Server shutdown requested by user")
159
+ sys.exit(0)
160
+ except Exception as e:
161
+ logger.error(f"Server error: {e}", exc_info=True)
162
+ sys.exit(1)
163
+
164
+
165
+ if __name__ == "__main__":
166
+ main()
@@ -0,0 +1,15 @@
1
+ """Services module for Ultimate Gemini MCP."""
2
+
3
+ from .gemini_client import GeminiClient
4
+ from .imagen_client import ImagenClient
5
+ from .image_service import ImageResult, ImageService
6
+ from .prompt_enhancer import PromptEnhancer, create_prompt_enhancer
7
+
8
+ __all__ = [
9
+ "GeminiClient",
10
+ "ImagenClient",
11
+ "ImageService",
12
+ "ImageResult",
13
+ "PromptEnhancer",
14
+ "create_prompt_enhancer",
15
+ ]
@@ -0,0 +1,230 @@
1
+ """
2
+ Gemini API client for Gemini 2.5 Flash Image generation.
3
+ Uses the generateContent API endpoint per Google's documentation.
4
+ """
5
+
6
+ import base64
7
+ import logging
8
+ from typing import Any
9
+
10
+ import httpx
11
+
12
+ from ..config.constants import GEMINI_API_BASE, GEMINI_MODELS
13
+ from ..core.exceptions import (
14
+ APIError,
15
+ AuthenticationError,
16
+ ContentPolicyError,
17
+ RateLimitError,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class GeminiClient:
24
+ """Client for Gemini 2.5 Flash Image API."""
25
+
26
+ def __init__(self, api_key: str, timeout: int = 60):
27
+ """
28
+ Initialize Gemini client.
29
+
30
+ Args:
31
+ api_key: Gemini API key
32
+ timeout: Request timeout in seconds
33
+ """
34
+ self.api_key = api_key
35
+ self.timeout = timeout
36
+ self.base_url = GEMINI_API_BASE
37
+ self.client = httpx.AsyncClient(timeout=timeout)
38
+
39
+ async def generate_image(
40
+ self,
41
+ prompt: str,
42
+ *,
43
+ model: str = "gemini-2.5-flash-image",
44
+ input_image: str | None = None,
45
+ aspect_ratio: str | None = None,
46
+ **kwargs: Any,
47
+ ) -> dict[str, Any]:
48
+ """
49
+ Generate or edit an image using Gemini 2.5 Flash Image.
50
+
51
+ Args:
52
+ prompt: Text prompt for image generation or editing instruction
53
+ model: Model to use (default: gemini-2.5-flash-image)
54
+ input_image: Base64-encoded input image for editing (optional)
55
+ aspect_ratio: Desired aspect ratio (optional, influences output)
56
+ **kwargs: Additional parameters
57
+
58
+ Returns:
59
+ Dict with 'images' key containing list of base64-encoded image data
60
+
61
+ Raises:
62
+ APIError: If the API request fails
63
+ """
64
+ model_id = GEMINI_MODELS.get(model, model)
65
+ url = f"{self.base_url}/models/{model_id}:generateContent"
66
+
67
+ # Build request body according to doc.md
68
+ parts: list[dict[str, Any]] = []
69
+
70
+ # Add input image if provided (for editing)
71
+ if input_image:
72
+ parts.append({
73
+ "inline_data": {
74
+ "mime_type": "image/png",
75
+ "data": input_image
76
+ }
77
+ })
78
+
79
+ # Add text prompt (include aspect ratio hint if specified)
80
+ prompt_text = prompt
81
+ if aspect_ratio:
82
+ prompt_text = f"{prompt}. Aspect ratio: {aspect_ratio}"
83
+
84
+ parts.append({"text": prompt_text})
85
+
86
+ request_body = {"contents": [{"parts": parts}]}
87
+
88
+ headers = {
89
+ "x-goog-api-key": self.api_key,
90
+ "Content-Type": "application/json",
91
+ }
92
+
93
+ try:
94
+ logger.debug(f"Sending request to {url}")
95
+ response = await self.client.post(url, json=request_body, headers=headers)
96
+ response.raise_for_status()
97
+ data = response.json()
98
+
99
+ # Extract images from response
100
+ images = self._extract_images(data)
101
+
102
+ if not images:
103
+ raise APIError("No image data found in Gemini API response")
104
+
105
+ return {"images": images, "model": model, "response": data}
106
+
107
+ except httpx.HTTPStatusError as e:
108
+ self._handle_http_error(e)
109
+ except Exception as e:
110
+ logger.error(f"Gemini API request failed: {e}")
111
+ raise APIError(f"Gemini API request failed: {e}")
112
+
113
+ async def generate_text(
114
+ self,
115
+ prompt: str,
116
+ *,
117
+ model: str = "gemini-2.0-flash",
118
+ system_instruction: str | None = None,
119
+ ) -> str:
120
+ """
121
+ Generate text using Gemini (for prompt enhancement).
122
+
123
+ Args:
124
+ prompt: Text prompt
125
+ model: Model to use
126
+ system_instruction: Optional system instruction
127
+
128
+ Returns:
129
+ Generated text response
130
+ """
131
+ model_id = GEMINI_MODELS.get(model, model)
132
+ url = f"{self.base_url}/models/{model_id}:generateContent"
133
+
134
+ request_body = {
135
+ "contents": [{"parts": [{"text": prompt}]}]
136
+ }
137
+
138
+ if system_instruction:
139
+ request_body["system_instruction"] = {
140
+ "parts": [{"text": system_instruction}]
141
+ }
142
+
143
+ headers = {
144
+ "x-goog-api-key": self.api_key,
145
+ "Content-Type": "application/json",
146
+ }
147
+
148
+ try:
149
+ response = await self.client.post(url, json=request_body, headers=headers)
150
+ response.raise_for_status()
151
+ data = response.json()
152
+
153
+ # Extract text from response
154
+ text = self._extract_text(data)
155
+ return text
156
+
157
+ except httpx.HTTPStatusError as e:
158
+ self._handle_http_error(e)
159
+ except Exception as e:
160
+ logger.error(f"Gemini text generation failed: {e}")
161
+ raise APIError(f"Gemini text generation failed: {e}")
162
+
163
+ def _extract_images(self, response_data: dict[str, Any]) -> list[str]:
164
+ """Extract base64 image data from Gemini API response."""
165
+ images = []
166
+
167
+ try:
168
+ candidates = response_data.get("candidates", [])
169
+ for candidate in candidates:
170
+ content = candidate.get("content", {})
171
+ parts = content.get("parts", [])
172
+ for part in parts:
173
+ if "inline_data" in part:
174
+ image_data = part["inline_data"].get("data")
175
+ if image_data:
176
+ images.append(image_data)
177
+ except Exception as e:
178
+ logger.warning(f"Error extracting images from response: {e}")
179
+
180
+ return images
181
+
182
+ def _extract_text(self, response_data: dict[str, Any]) -> str:
183
+ """Extract text from Gemini API response."""
184
+ try:
185
+ candidates = response_data.get("candidates", [])
186
+ if not candidates:
187
+ return ""
188
+
189
+ content = candidates[0].get("content", {})
190
+ parts = content.get("parts", [])
191
+
192
+ # Concatenate all text parts
193
+ text_parts = [part.get("text", "") for part in parts if "text" in part]
194
+ return "".join(text_parts)
195
+
196
+ except Exception as e:
197
+ logger.warning(f"Error extracting text from response: {e}")
198
+ return ""
199
+
200
+ def _handle_http_error(self, error: httpx.HTTPStatusError) -> None:
201
+ """Handle HTTP errors and raise appropriate exceptions."""
202
+ status_code = error.response.status_code
203
+ error_text = error.response.text
204
+
205
+ logger.error(f"API request failed with status {status_code}: {error_text}")
206
+
207
+ if status_code == 401 or status_code == 403:
208
+ raise AuthenticationError(
209
+ "Authentication failed. Please check your Gemini API key.",
210
+ status_code=status_code
211
+ )
212
+ elif status_code == 429:
213
+ raise RateLimitError(
214
+ "Rate limit exceeded. Please try again later.",
215
+ status_code=status_code
216
+ )
217
+ elif status_code == 400 and ("SAFETY" in error_text.upper() or "BLOCKED" in error_text.upper()):
218
+ raise ContentPolicyError(
219
+ "Content was blocked by safety filters. Please modify your prompt.",
220
+ status_code=status_code
221
+ )
222
+ else:
223
+ raise APIError(
224
+ f"API request failed with status {status_code}: {error_text}",
225
+ status_code=status_code
226
+ )
227
+
228
+ async def close(self) -> None:
229
+ """Close the HTTP client."""
230
+ await self.client.aclose()
@@ -0,0 +1,243 @@
1
+ """
2
+ Unified image service that orchestrates Gemini and Imagen APIs.
3
+ Provides a consistent interface for image generation regardless of the underlying model.
4
+ """
5
+
6
+ import base64
7
+ import logging
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from PIL import Image
13
+
14
+ from ..config.constants import GEMINI_MODELS, IMAGEN_MODELS
15
+ from ..core import sanitize_filename
16
+ from ..core.exceptions import ImageProcessingError
17
+ from .gemini_client import GeminiClient
18
+ from .imagen_client import ImagenClient
19
+ from .prompt_enhancer import PromptEnhancer
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class ImageResult:
25
+ """Container for generated image data and metadata."""
26
+
27
+ def __init__(
28
+ self,
29
+ image_data: str,
30
+ prompt: str,
31
+ model: str,
32
+ index: int = 0,
33
+ metadata: dict[str, Any] | None = None
34
+ ):
35
+ self.image_data = image_data # Base64-encoded
36
+ self.prompt = prompt
37
+ self.model = model
38
+ self.index = index
39
+ self.metadata = metadata or {}
40
+ self.timestamp = datetime.now()
41
+
42
+ def save(self, output_dir: Path, filename: str | None = None) -> Path:
43
+ """Save image to disk."""
44
+ if filename is None:
45
+ filename = self._generate_filename()
46
+
47
+ output_path = output_dir / filename
48
+
49
+ try:
50
+ # Decode base64 and save
51
+ image_bytes = base64.b64decode(self.image_data)
52
+ output_path.write_bytes(image_bytes)
53
+ logger.info(f"Saved image to {output_path}")
54
+ return output_path
55
+ except Exception as e:
56
+ raise ImageProcessingError(f"Failed to save image: {e}")
57
+
58
+ def _generate_filename(self) -> str:
59
+ """Generate descriptive filename."""
60
+ timestamp = self.timestamp.strftime("%Y%m%d_%H%M%S")
61
+ # Sanitize prompt for filename
62
+ prompt_snippet = sanitize_filename(self.prompt[:50])
63
+ index_str = f"_{self.index + 1}" if self.index > 0 else ""
64
+ return f"{self.model}_{timestamp}_{prompt_snippet}{index_str}.png"
65
+
66
+ def get_size(self) -> int:
67
+ """Get image size in bytes."""
68
+ return len(base64.b64decode(self.image_data))
69
+
70
+
71
+ class ImageService:
72
+ """Unified service for image generation using Gemini or Imagen."""
73
+
74
+ def __init__(
75
+ self,
76
+ api_key: str,
77
+ *,
78
+ enable_enhancement: bool = True,
79
+ timeout: int = 60
80
+ ):
81
+ """
82
+ Initialize image service.
83
+
84
+ Args:
85
+ api_key: API key for Google AI services
86
+ enable_enhancement: Enable automatic prompt enhancement
87
+ timeout: Request timeout in seconds
88
+ """
89
+ self.api_key = api_key
90
+ self.enable_enhancement = enable_enhancement
91
+ self.timeout = timeout
92
+
93
+ # Initialize clients
94
+ self.gemini_client = GeminiClient(api_key, timeout)
95
+ self.imagen_client = ImagenClient(api_key, timeout)
96
+ self.prompt_enhancer: PromptEnhancer | None = None
97
+
98
+ if enable_enhancement:
99
+ # Prompt enhancer uses the same Gemini client
100
+ self.prompt_enhancer = PromptEnhancer(self.gemini_client)
101
+
102
+ async def generate(
103
+ self,
104
+ prompt: str,
105
+ *,
106
+ model: str | None = None,
107
+ enhance_prompt: bool = True,
108
+ **kwargs: Any
109
+ ) -> list[ImageResult]:
110
+ """
111
+ Generate images using the appropriate API.
112
+
113
+ Args:
114
+ prompt: Text prompt for image generation
115
+ model: Model to use (auto-detected if None)
116
+ enhance_prompt: Whether to enhance the prompt
117
+ **kwargs: Additional parameters (aspect_ratio, number_of_images, etc.)
118
+
119
+ Returns:
120
+ List of ImageResult objects
121
+ """
122
+ # Detect which API to use based on model
123
+ if model is None:
124
+ model = "gemini-2.5-flash-image" # Default to Gemini
125
+
126
+ is_gemini = model in GEMINI_MODELS
127
+ is_imagen = model in IMAGEN_MODELS
128
+
129
+ if not is_gemini and not is_imagen:
130
+ raise ValueError(f"Unknown model: {model}")
131
+
132
+ # Enhance prompt if enabled
133
+ original_prompt = prompt
134
+ enhancement_context = self._build_enhancement_context(kwargs)
135
+
136
+ if enhance_prompt and self.enable_enhancement and self.prompt_enhancer:
137
+ try:
138
+ result = await self.prompt_enhancer.enhance_prompt(
139
+ prompt,
140
+ context=enhancement_context
141
+ )
142
+ prompt = result["enhanced_prompt"]
143
+ logger.info(f"Prompt enhanced: {len(original_prompt)} -> {len(prompt)} chars")
144
+ except Exception as e:
145
+ logger.warning(f"Prompt enhancement failed: {e}")
146
+
147
+ # Generate images using appropriate API
148
+ if is_gemini:
149
+ return await self._generate_with_gemini(prompt, model, original_prompt, kwargs)
150
+ else:
151
+ return await self._generate_with_imagen(prompt, model, original_prompt, kwargs)
152
+
153
+ async def _generate_with_gemini(
154
+ self,
155
+ prompt: str,
156
+ model: str,
157
+ original_prompt: str,
158
+ params: dict[str, Any]
159
+ ) -> list[ImageResult]:
160
+ """Generate images using Gemini API."""
161
+ response = await self.gemini_client.generate_image(
162
+ prompt=prompt,
163
+ model=model,
164
+ **params
165
+ )
166
+
167
+ images = response["images"]
168
+ results = []
169
+
170
+ for i, image_data in enumerate(images):
171
+ result = ImageResult(
172
+ image_data=image_data,
173
+ prompt=original_prompt,
174
+ model=model,
175
+ index=i,
176
+ metadata={
177
+ "enhanced_prompt": prompt,
178
+ "api": "gemini",
179
+ **params
180
+ }
181
+ )
182
+ results.append(result)
183
+
184
+ return results
185
+
186
+ async def _generate_with_imagen(
187
+ self,
188
+ prompt: str,
189
+ model: str,
190
+ original_prompt: str,
191
+ params: dict[str, Any]
192
+ ) -> list[ImageResult]:
193
+ """Generate images using Imagen API."""
194
+ response = await self.imagen_client.generate_image(
195
+ prompt=prompt,
196
+ model=model,
197
+ **params
198
+ )
199
+
200
+ images = response["images"]
201
+ results = []
202
+
203
+ for i, image_data in enumerate(images):
204
+ result = ImageResult(
205
+ image_data=image_data,
206
+ prompt=original_prompt,
207
+ model=model,
208
+ index=i,
209
+ metadata={
210
+ "enhanced_prompt": prompt,
211
+ "api": "imagen",
212
+ **params
213
+ }
214
+ )
215
+ results.append(result)
216
+
217
+ return results
218
+
219
+ def _build_enhancement_context(self, params: dict[str, Any]) -> dict[str, Any]:
220
+ """Build context for prompt enhancement."""
221
+ context = {}
222
+
223
+ if "input_image" in params:
224
+ context["is_editing"] = True
225
+
226
+ if params.get("maintainCharacterConsistency"):
227
+ context["maintain_character_consistency"] = True
228
+
229
+ if params.get("blendImages"):
230
+ context["blend_images"] = True
231
+
232
+ if params.get("useWorldKnowledge"):
233
+ context["use_world_knowledge"] = True
234
+
235
+ if "aspect_ratio" in params:
236
+ context["aspect_ratio"] = params["aspect_ratio"]
237
+
238
+ return context
239
+
240
+ async def close(self) -> None:
241
+ """Close all clients."""
242
+ await self.gemini_client.close()
243
+ await self.imagen_client.close()