ultimate-gemini-mcp 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultimate-gemini-mcp might be problematic. Click here for more details.
- src/__init__.py +16 -0
- src/config/__init__.py +32 -0
- src/config/constants.py +77 -0
- src/config/settings.py +143 -0
- src/core/__init__.py +55 -0
- src/core/exceptions.py +60 -0
- src/core/validation.py +161 -0
- src/server.py +166 -0
- src/services/__init__.py +15 -0
- src/services/gemini_client.py +230 -0
- src/services/image_service.py +243 -0
- src/services/imagen_client.py +175 -0
- src/services/prompt_enhancer.py +140 -0
- src/tools/__init__.py +11 -0
- src/tools/batch_generate.py +159 -0
- src/tools/generate_image.py +252 -0
- ultimate_gemini_mcp-1.0.1.dist-info/METADATA +372 -0
- ultimate_gemini_mcp-1.0.1.dist-info/RECORD +21 -0
- ultimate_gemini_mcp-1.0.1.dist-info/WHEEL +4 -0
- ultimate_gemini_mcp-1.0.1.dist-info/entry_points.txt +2 -0
- ultimate_gemini_mcp-1.0.1.dist-info/licenses/LICENSE +31 -0
src/server.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Ultimate Gemini MCP Server - Main Entry Point
|
|
4
|
+
|
|
5
|
+
Unified MCP server supporting:
|
|
6
|
+
- Gemini 2.5 Flash Image (with prompt enhancement and editing)
|
|
7
|
+
- Imagen 3, 4, and 4-Ultra (with advanced controls)
|
|
8
|
+
- Batch processing, prompt templates, and comprehensive features
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from fastmcp import FastMCP
|
|
16
|
+
|
|
17
|
+
from .config import ALL_MODELS, get_settings
|
|
18
|
+
from .tools import register_batch_generate_tool, register_generate_image_tool
|
|
19
|
+
|
|
20
|
+
# Set up logging
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=logging.INFO,
|
|
23
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
24
|
+
stream=sys.stderr, # Important: use stderr for logging in MCP
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def create_app() -> FastMCP:
|
|
31
|
+
"""
|
|
32
|
+
Create and configure the Ultimate Gemini MCP application.
|
|
33
|
+
|
|
34
|
+
This is the factory function used by FastMCP CLI.
|
|
35
|
+
"""
|
|
36
|
+
logger.info("Initializing Ultimate Gemini MCP Server...")
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
# Load settings (validates API key)
|
|
40
|
+
settings = get_settings()
|
|
41
|
+
|
|
42
|
+
logger.info(f"Output directory: {settings.output_dir}")
|
|
43
|
+
logger.info(f"Prompt enhancement: {settings.api.enable_prompt_enhancement}")
|
|
44
|
+
logger.info(f"Available models: {', '.join(ALL_MODELS.keys())}")
|
|
45
|
+
|
|
46
|
+
# Create FastMCP server
|
|
47
|
+
mcp = FastMCP(
|
|
48
|
+
"Ultimate Gemini MCP",
|
|
49
|
+
version="1.0.0",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Register tools
|
|
53
|
+
register_generate_image_tool(mcp)
|
|
54
|
+
register_batch_generate_tool(mcp)
|
|
55
|
+
|
|
56
|
+
# Add resources
|
|
57
|
+
@mcp.resource("models://list")
|
|
58
|
+
def list_models() -> str:
|
|
59
|
+
"""List all available image generation models."""
|
|
60
|
+
import json
|
|
61
|
+
|
|
62
|
+
models_info = {
|
|
63
|
+
"gemini": {
|
|
64
|
+
"gemini-2.5-flash-image": {
|
|
65
|
+
"name": "Gemini 2.5 Flash Image",
|
|
66
|
+
"description": "Advanced image generation with editing and prompt enhancement",
|
|
67
|
+
"features": [
|
|
68
|
+
"Prompt enhancement",
|
|
69
|
+
"Image editing",
|
|
70
|
+
"Character consistency",
|
|
71
|
+
"Multi-image blending",
|
|
72
|
+
"World knowledge integration"
|
|
73
|
+
],
|
|
74
|
+
"default": True
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"imagen": {
|
|
78
|
+
"imagen-4": {
|
|
79
|
+
"name": "Imagen 4",
|
|
80
|
+
"description": "High-quality image generation with improved text rendering",
|
|
81
|
+
"features": [
|
|
82
|
+
"Enhanced quality",
|
|
83
|
+
"Better text rendering",
|
|
84
|
+
"Negative prompts",
|
|
85
|
+
"Seed-based reproducibility",
|
|
86
|
+
"Person generation controls",
|
|
87
|
+
"Advanced controls"
|
|
88
|
+
]
|
|
89
|
+
},
|
|
90
|
+
"imagen-4-fast": {
|
|
91
|
+
"name": "Imagen 4 Fast",
|
|
92
|
+
"description": "Optimized for faster generation while maintaining good quality",
|
|
93
|
+
"features": [
|
|
94
|
+
"Faster generation speed",
|
|
95
|
+
"Good quality output",
|
|
96
|
+
"Negative prompts",
|
|
97
|
+
"Seed-based reproducibility",
|
|
98
|
+
"Person generation controls",
|
|
99
|
+
"Cost-effective"
|
|
100
|
+
]
|
|
101
|
+
},
|
|
102
|
+
"imagen-4-ultra": {
|
|
103
|
+
"name": "Imagen 4 Ultra",
|
|
104
|
+
"description": "Highest quality with best prompt adherence",
|
|
105
|
+
"features": [
|
|
106
|
+
"Highest quality",
|
|
107
|
+
"Best prompt adherence",
|
|
108
|
+
"Professional results",
|
|
109
|
+
"Enhanced text rendering",
|
|
110
|
+
"Advanced controls"
|
|
111
|
+
]
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return json.dumps(models_info, indent=2)
|
|
117
|
+
|
|
118
|
+
@mcp.resource("settings://config")
|
|
119
|
+
def get_config() -> str:
|
|
120
|
+
"""Get current server configuration."""
|
|
121
|
+
import json
|
|
122
|
+
|
|
123
|
+
config = {
|
|
124
|
+
"output_directory": str(settings.output_dir),
|
|
125
|
+
"prompt_enhancement_enabled": settings.api.enable_prompt_enhancement,
|
|
126
|
+
"batch_processing_enabled": settings.api.enable_batch_processing,
|
|
127
|
+
"default_gemini_model": settings.api.default_gemini_model,
|
|
128
|
+
"default_imagen_model": settings.api.default_imagen_model,
|
|
129
|
+
"max_batch_size": settings.api.max_batch_size,
|
|
130
|
+
"request_timeout": settings.api.request_timeout,
|
|
131
|
+
"default_aspect_ratio": settings.api.default_aspect_ratio,
|
|
132
|
+
"default_output_format": settings.api.default_output_format,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return json.dumps(config, indent=2)
|
|
136
|
+
|
|
137
|
+
logger.info("Ultimate Gemini MCP Server initialized successfully")
|
|
138
|
+
return mcp
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
logger.error(f"Failed to initialize server: {e}", exc_info=True)
|
|
142
|
+
raise
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def main() -> None:
|
|
146
|
+
"""Main entry point for direct execution."""
|
|
147
|
+
try:
|
|
148
|
+
logger.info("Starting Ultimate Gemini MCP Server...")
|
|
149
|
+
|
|
150
|
+
# Create application
|
|
151
|
+
app = create_app()
|
|
152
|
+
|
|
153
|
+
# Run the server (FastMCP handles stdio transport)
|
|
154
|
+
logger.info("Server is ready and listening for MCP requests")
|
|
155
|
+
app.run()
|
|
156
|
+
|
|
157
|
+
except KeyboardInterrupt:
|
|
158
|
+
logger.info("Server shutdown requested by user")
|
|
159
|
+
sys.exit(0)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Server error: {e}", exc_info=True)
|
|
162
|
+
sys.exit(1)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
if __name__ == "__main__":
|
|
166
|
+
main()
|
src/services/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Services module for Ultimate Gemini MCP."""
|
|
2
|
+
|
|
3
|
+
from .gemini_client import GeminiClient
|
|
4
|
+
from .imagen_client import ImagenClient
|
|
5
|
+
from .image_service import ImageResult, ImageService
|
|
6
|
+
from .prompt_enhancer import PromptEnhancer, create_prompt_enhancer
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"GeminiClient",
|
|
10
|
+
"ImagenClient",
|
|
11
|
+
"ImageService",
|
|
12
|
+
"ImageResult",
|
|
13
|
+
"PromptEnhancer",
|
|
14
|
+
"create_prompt_enhancer",
|
|
15
|
+
]
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gemini API client for Gemini 2.5 Flash Image generation.
|
|
3
|
+
Uses the generateContent API endpoint per Google's documentation.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from ..config.constants import GEMINI_API_BASE, GEMINI_MODELS
|
|
13
|
+
from ..core.exceptions import (
|
|
14
|
+
APIError,
|
|
15
|
+
AuthenticationError,
|
|
16
|
+
ContentPolicyError,
|
|
17
|
+
RateLimitError,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GeminiClient:
|
|
24
|
+
"""Client for Gemini 2.5 Flash Image API."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, api_key: str, timeout: int = 60):
|
|
27
|
+
"""
|
|
28
|
+
Initialize Gemini client.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
api_key: Gemini API key
|
|
32
|
+
timeout: Request timeout in seconds
|
|
33
|
+
"""
|
|
34
|
+
self.api_key = api_key
|
|
35
|
+
self.timeout = timeout
|
|
36
|
+
self.base_url = GEMINI_API_BASE
|
|
37
|
+
self.client = httpx.AsyncClient(timeout=timeout)
|
|
38
|
+
|
|
39
|
+
async def generate_image(
|
|
40
|
+
self,
|
|
41
|
+
prompt: str,
|
|
42
|
+
*,
|
|
43
|
+
model: str = "gemini-2.5-flash-image",
|
|
44
|
+
input_image: str | None = None,
|
|
45
|
+
aspect_ratio: str | None = None,
|
|
46
|
+
**kwargs: Any,
|
|
47
|
+
) -> dict[str, Any]:
|
|
48
|
+
"""
|
|
49
|
+
Generate or edit an image using Gemini 2.5 Flash Image.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
prompt: Text prompt for image generation or editing instruction
|
|
53
|
+
model: Model to use (default: gemini-2.5-flash-image)
|
|
54
|
+
input_image: Base64-encoded input image for editing (optional)
|
|
55
|
+
aspect_ratio: Desired aspect ratio (optional, influences output)
|
|
56
|
+
**kwargs: Additional parameters
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Dict with 'images' key containing list of base64-encoded image data
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
APIError: If the API request fails
|
|
63
|
+
"""
|
|
64
|
+
model_id = GEMINI_MODELS.get(model, model)
|
|
65
|
+
url = f"{self.base_url}/models/{model_id}:generateContent"
|
|
66
|
+
|
|
67
|
+
# Build request body according to doc.md
|
|
68
|
+
parts: list[dict[str, Any]] = []
|
|
69
|
+
|
|
70
|
+
# Add input image if provided (for editing)
|
|
71
|
+
if input_image:
|
|
72
|
+
parts.append({
|
|
73
|
+
"inline_data": {
|
|
74
|
+
"mime_type": "image/png",
|
|
75
|
+
"data": input_image
|
|
76
|
+
}
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
# Add text prompt (include aspect ratio hint if specified)
|
|
80
|
+
prompt_text = prompt
|
|
81
|
+
if aspect_ratio:
|
|
82
|
+
prompt_text = f"{prompt}. Aspect ratio: {aspect_ratio}"
|
|
83
|
+
|
|
84
|
+
parts.append({"text": prompt_text})
|
|
85
|
+
|
|
86
|
+
request_body = {"contents": [{"parts": parts}]}
|
|
87
|
+
|
|
88
|
+
headers = {
|
|
89
|
+
"x-goog-api-key": self.api_key,
|
|
90
|
+
"Content-Type": "application/json",
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
logger.debug(f"Sending request to {url}")
|
|
95
|
+
response = await self.client.post(url, json=request_body, headers=headers)
|
|
96
|
+
response.raise_for_status()
|
|
97
|
+
data = response.json()
|
|
98
|
+
|
|
99
|
+
# Extract images from response
|
|
100
|
+
images = self._extract_images(data)
|
|
101
|
+
|
|
102
|
+
if not images:
|
|
103
|
+
raise APIError("No image data found in Gemini API response")
|
|
104
|
+
|
|
105
|
+
return {"images": images, "model": model, "response": data}
|
|
106
|
+
|
|
107
|
+
except httpx.HTTPStatusError as e:
|
|
108
|
+
self._handle_http_error(e)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
logger.error(f"Gemini API request failed: {e}")
|
|
111
|
+
raise APIError(f"Gemini API request failed: {e}")
|
|
112
|
+
|
|
113
|
+
async def generate_text(
|
|
114
|
+
self,
|
|
115
|
+
prompt: str,
|
|
116
|
+
*,
|
|
117
|
+
model: str = "gemini-2.0-flash",
|
|
118
|
+
system_instruction: str | None = None,
|
|
119
|
+
) -> str:
|
|
120
|
+
"""
|
|
121
|
+
Generate text using Gemini (for prompt enhancement).
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
prompt: Text prompt
|
|
125
|
+
model: Model to use
|
|
126
|
+
system_instruction: Optional system instruction
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Generated text response
|
|
130
|
+
"""
|
|
131
|
+
model_id = GEMINI_MODELS.get(model, model)
|
|
132
|
+
url = f"{self.base_url}/models/{model_id}:generateContent"
|
|
133
|
+
|
|
134
|
+
request_body = {
|
|
135
|
+
"contents": [{"parts": [{"text": prompt}]}]
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if system_instruction:
|
|
139
|
+
request_body["system_instruction"] = {
|
|
140
|
+
"parts": [{"text": system_instruction}]
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
headers = {
|
|
144
|
+
"x-goog-api-key": self.api_key,
|
|
145
|
+
"Content-Type": "application/json",
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
response = await self.client.post(url, json=request_body, headers=headers)
|
|
150
|
+
response.raise_for_status()
|
|
151
|
+
data = response.json()
|
|
152
|
+
|
|
153
|
+
# Extract text from response
|
|
154
|
+
text = self._extract_text(data)
|
|
155
|
+
return text
|
|
156
|
+
|
|
157
|
+
except httpx.HTTPStatusError as e:
|
|
158
|
+
self._handle_http_error(e)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Gemini text generation failed: {e}")
|
|
161
|
+
raise APIError(f"Gemini text generation failed: {e}")
|
|
162
|
+
|
|
163
|
+
def _extract_images(self, response_data: dict[str, Any]) -> list[str]:
|
|
164
|
+
"""Extract base64 image data from Gemini API response."""
|
|
165
|
+
images = []
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
candidates = response_data.get("candidates", [])
|
|
169
|
+
for candidate in candidates:
|
|
170
|
+
content = candidate.get("content", {})
|
|
171
|
+
parts = content.get("parts", [])
|
|
172
|
+
for part in parts:
|
|
173
|
+
if "inline_data" in part:
|
|
174
|
+
image_data = part["inline_data"].get("data")
|
|
175
|
+
if image_data:
|
|
176
|
+
images.append(image_data)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.warning(f"Error extracting images from response: {e}")
|
|
179
|
+
|
|
180
|
+
return images
|
|
181
|
+
|
|
182
|
+
def _extract_text(self, response_data: dict[str, Any]) -> str:
|
|
183
|
+
"""Extract text from Gemini API response."""
|
|
184
|
+
try:
|
|
185
|
+
candidates = response_data.get("candidates", [])
|
|
186
|
+
if not candidates:
|
|
187
|
+
return ""
|
|
188
|
+
|
|
189
|
+
content = candidates[0].get("content", {})
|
|
190
|
+
parts = content.get("parts", [])
|
|
191
|
+
|
|
192
|
+
# Concatenate all text parts
|
|
193
|
+
text_parts = [part.get("text", "") for part in parts if "text" in part]
|
|
194
|
+
return "".join(text_parts)
|
|
195
|
+
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.warning(f"Error extracting text from response: {e}")
|
|
198
|
+
return ""
|
|
199
|
+
|
|
200
|
+
def _handle_http_error(self, error: httpx.HTTPStatusError) -> None:
|
|
201
|
+
"""Handle HTTP errors and raise appropriate exceptions."""
|
|
202
|
+
status_code = error.response.status_code
|
|
203
|
+
error_text = error.response.text
|
|
204
|
+
|
|
205
|
+
logger.error(f"API request failed with status {status_code}: {error_text}")
|
|
206
|
+
|
|
207
|
+
if status_code == 401 or status_code == 403:
|
|
208
|
+
raise AuthenticationError(
|
|
209
|
+
"Authentication failed. Please check your Gemini API key.",
|
|
210
|
+
status_code=status_code
|
|
211
|
+
)
|
|
212
|
+
elif status_code == 429:
|
|
213
|
+
raise RateLimitError(
|
|
214
|
+
"Rate limit exceeded. Please try again later.",
|
|
215
|
+
status_code=status_code
|
|
216
|
+
)
|
|
217
|
+
elif status_code == 400 and ("SAFETY" in error_text.upper() or "BLOCKED" in error_text.upper()):
|
|
218
|
+
raise ContentPolicyError(
|
|
219
|
+
"Content was blocked by safety filters. Please modify your prompt.",
|
|
220
|
+
status_code=status_code
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
raise APIError(
|
|
224
|
+
f"API request failed with status {status_code}: {error_text}",
|
|
225
|
+
status_code=status_code
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
async def close(self) -> None:
|
|
229
|
+
"""Close the HTTP client."""
|
|
230
|
+
await self.client.aclose()
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified image service that orchestrates Gemini and Imagen APIs.
|
|
3
|
+
Provides a consistent interface for image generation regardless of the underlying model.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import logging
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from PIL import Image
|
|
13
|
+
|
|
14
|
+
from ..config.constants import GEMINI_MODELS, IMAGEN_MODELS
|
|
15
|
+
from ..core import sanitize_filename
|
|
16
|
+
from ..core.exceptions import ImageProcessingError
|
|
17
|
+
from .gemini_client import GeminiClient
|
|
18
|
+
from .imagen_client import ImagenClient
|
|
19
|
+
from .prompt_enhancer import PromptEnhancer
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ImageResult:
|
|
25
|
+
"""Container for generated image data and metadata."""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
image_data: str,
|
|
30
|
+
prompt: str,
|
|
31
|
+
model: str,
|
|
32
|
+
index: int = 0,
|
|
33
|
+
metadata: dict[str, Any] | None = None
|
|
34
|
+
):
|
|
35
|
+
self.image_data = image_data # Base64-encoded
|
|
36
|
+
self.prompt = prompt
|
|
37
|
+
self.model = model
|
|
38
|
+
self.index = index
|
|
39
|
+
self.metadata = metadata or {}
|
|
40
|
+
self.timestamp = datetime.now()
|
|
41
|
+
|
|
42
|
+
def save(self, output_dir: Path, filename: str | None = None) -> Path:
|
|
43
|
+
"""Save image to disk."""
|
|
44
|
+
if filename is None:
|
|
45
|
+
filename = self._generate_filename()
|
|
46
|
+
|
|
47
|
+
output_path = output_dir / filename
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
# Decode base64 and save
|
|
51
|
+
image_bytes = base64.b64decode(self.image_data)
|
|
52
|
+
output_path.write_bytes(image_bytes)
|
|
53
|
+
logger.info(f"Saved image to {output_path}")
|
|
54
|
+
return output_path
|
|
55
|
+
except Exception as e:
|
|
56
|
+
raise ImageProcessingError(f"Failed to save image: {e}")
|
|
57
|
+
|
|
58
|
+
def _generate_filename(self) -> str:
|
|
59
|
+
"""Generate descriptive filename."""
|
|
60
|
+
timestamp = self.timestamp.strftime("%Y%m%d_%H%M%S")
|
|
61
|
+
# Sanitize prompt for filename
|
|
62
|
+
prompt_snippet = sanitize_filename(self.prompt[:50])
|
|
63
|
+
index_str = f"_{self.index + 1}" if self.index > 0 else ""
|
|
64
|
+
return f"{self.model}_{timestamp}_{prompt_snippet}{index_str}.png"
|
|
65
|
+
|
|
66
|
+
def get_size(self) -> int:
|
|
67
|
+
"""Get image size in bytes."""
|
|
68
|
+
return len(base64.b64decode(self.image_data))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ImageService:
|
|
72
|
+
"""Unified service for image generation using Gemini or Imagen."""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
api_key: str,
|
|
77
|
+
*,
|
|
78
|
+
enable_enhancement: bool = True,
|
|
79
|
+
timeout: int = 60
|
|
80
|
+
):
|
|
81
|
+
"""
|
|
82
|
+
Initialize image service.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
api_key: API key for Google AI services
|
|
86
|
+
enable_enhancement: Enable automatic prompt enhancement
|
|
87
|
+
timeout: Request timeout in seconds
|
|
88
|
+
"""
|
|
89
|
+
self.api_key = api_key
|
|
90
|
+
self.enable_enhancement = enable_enhancement
|
|
91
|
+
self.timeout = timeout
|
|
92
|
+
|
|
93
|
+
# Initialize clients
|
|
94
|
+
self.gemini_client = GeminiClient(api_key, timeout)
|
|
95
|
+
self.imagen_client = ImagenClient(api_key, timeout)
|
|
96
|
+
self.prompt_enhancer: PromptEnhancer | None = None
|
|
97
|
+
|
|
98
|
+
if enable_enhancement:
|
|
99
|
+
# Prompt enhancer uses the same Gemini client
|
|
100
|
+
self.prompt_enhancer = PromptEnhancer(self.gemini_client)
|
|
101
|
+
|
|
102
|
+
async def generate(
|
|
103
|
+
self,
|
|
104
|
+
prompt: str,
|
|
105
|
+
*,
|
|
106
|
+
model: str | None = None,
|
|
107
|
+
enhance_prompt: bool = True,
|
|
108
|
+
**kwargs: Any
|
|
109
|
+
) -> list[ImageResult]:
|
|
110
|
+
"""
|
|
111
|
+
Generate images using the appropriate API.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
prompt: Text prompt for image generation
|
|
115
|
+
model: Model to use (auto-detected if None)
|
|
116
|
+
enhance_prompt: Whether to enhance the prompt
|
|
117
|
+
**kwargs: Additional parameters (aspect_ratio, number_of_images, etc.)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List of ImageResult objects
|
|
121
|
+
"""
|
|
122
|
+
# Detect which API to use based on model
|
|
123
|
+
if model is None:
|
|
124
|
+
model = "gemini-2.5-flash-image" # Default to Gemini
|
|
125
|
+
|
|
126
|
+
is_gemini = model in GEMINI_MODELS
|
|
127
|
+
is_imagen = model in IMAGEN_MODELS
|
|
128
|
+
|
|
129
|
+
if not is_gemini and not is_imagen:
|
|
130
|
+
raise ValueError(f"Unknown model: {model}")
|
|
131
|
+
|
|
132
|
+
# Enhance prompt if enabled
|
|
133
|
+
original_prompt = prompt
|
|
134
|
+
enhancement_context = self._build_enhancement_context(kwargs)
|
|
135
|
+
|
|
136
|
+
if enhance_prompt and self.enable_enhancement and self.prompt_enhancer:
|
|
137
|
+
try:
|
|
138
|
+
result = await self.prompt_enhancer.enhance_prompt(
|
|
139
|
+
prompt,
|
|
140
|
+
context=enhancement_context
|
|
141
|
+
)
|
|
142
|
+
prompt = result["enhanced_prompt"]
|
|
143
|
+
logger.info(f"Prompt enhanced: {len(original_prompt)} -> {len(prompt)} chars")
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.warning(f"Prompt enhancement failed: {e}")
|
|
146
|
+
|
|
147
|
+
# Generate images using appropriate API
|
|
148
|
+
if is_gemini:
|
|
149
|
+
return await self._generate_with_gemini(prompt, model, original_prompt, kwargs)
|
|
150
|
+
else:
|
|
151
|
+
return await self._generate_with_imagen(prompt, model, original_prompt, kwargs)
|
|
152
|
+
|
|
153
|
+
async def _generate_with_gemini(
|
|
154
|
+
self,
|
|
155
|
+
prompt: str,
|
|
156
|
+
model: str,
|
|
157
|
+
original_prompt: str,
|
|
158
|
+
params: dict[str, Any]
|
|
159
|
+
) -> list[ImageResult]:
|
|
160
|
+
"""Generate images using Gemini API."""
|
|
161
|
+
response = await self.gemini_client.generate_image(
|
|
162
|
+
prompt=prompt,
|
|
163
|
+
model=model,
|
|
164
|
+
**params
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
images = response["images"]
|
|
168
|
+
results = []
|
|
169
|
+
|
|
170
|
+
for i, image_data in enumerate(images):
|
|
171
|
+
result = ImageResult(
|
|
172
|
+
image_data=image_data,
|
|
173
|
+
prompt=original_prompt,
|
|
174
|
+
model=model,
|
|
175
|
+
index=i,
|
|
176
|
+
metadata={
|
|
177
|
+
"enhanced_prompt": prompt,
|
|
178
|
+
"api": "gemini",
|
|
179
|
+
**params
|
|
180
|
+
}
|
|
181
|
+
)
|
|
182
|
+
results.append(result)
|
|
183
|
+
|
|
184
|
+
return results
|
|
185
|
+
|
|
186
|
+
async def _generate_with_imagen(
|
|
187
|
+
self,
|
|
188
|
+
prompt: str,
|
|
189
|
+
model: str,
|
|
190
|
+
original_prompt: str,
|
|
191
|
+
params: dict[str, Any]
|
|
192
|
+
) -> list[ImageResult]:
|
|
193
|
+
"""Generate images using Imagen API."""
|
|
194
|
+
response = await self.imagen_client.generate_image(
|
|
195
|
+
prompt=prompt,
|
|
196
|
+
model=model,
|
|
197
|
+
**params
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
images = response["images"]
|
|
201
|
+
results = []
|
|
202
|
+
|
|
203
|
+
for i, image_data in enumerate(images):
|
|
204
|
+
result = ImageResult(
|
|
205
|
+
image_data=image_data,
|
|
206
|
+
prompt=original_prompt,
|
|
207
|
+
model=model,
|
|
208
|
+
index=i,
|
|
209
|
+
metadata={
|
|
210
|
+
"enhanced_prompt": prompt,
|
|
211
|
+
"api": "imagen",
|
|
212
|
+
**params
|
|
213
|
+
}
|
|
214
|
+
)
|
|
215
|
+
results.append(result)
|
|
216
|
+
|
|
217
|
+
return results
|
|
218
|
+
|
|
219
|
+
def _build_enhancement_context(self, params: dict[str, Any]) -> dict[str, Any]:
|
|
220
|
+
"""Build context for prompt enhancement."""
|
|
221
|
+
context = {}
|
|
222
|
+
|
|
223
|
+
if "input_image" in params:
|
|
224
|
+
context["is_editing"] = True
|
|
225
|
+
|
|
226
|
+
if params.get("maintainCharacterConsistency"):
|
|
227
|
+
context["maintain_character_consistency"] = True
|
|
228
|
+
|
|
229
|
+
if params.get("blendImages"):
|
|
230
|
+
context["blend_images"] = True
|
|
231
|
+
|
|
232
|
+
if params.get("useWorldKnowledge"):
|
|
233
|
+
context["use_world_knowledge"] = True
|
|
234
|
+
|
|
235
|
+
if "aspect_ratio" in params:
|
|
236
|
+
context["aspect_ratio"] = params["aspect_ratio"]
|
|
237
|
+
|
|
238
|
+
return context
|
|
239
|
+
|
|
240
|
+
async def close(self) -> None:
|
|
241
|
+
"""Close all clients."""
|
|
242
|
+
await self.gemini_client.close()
|
|
243
|
+
await self.imagen_client.close()
|