tena 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tena/__init__.py +3 -0
- tena/image.py +990 -0
- tena/main.py +168 -0
- tena-0.1.0.dist-info/METADATA +227 -0
- tena-0.1.0.dist-info/RECORD +7 -0
- tena-0.1.0.dist-info/WHEEL +4 -0
- tena-0.1.0.dist-info/entry_points.txt +2 -0
tena/__init__.py
ADDED
tena/image.py
ADDED
|
@@ -0,0 +1,990 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import base64
|
|
5
|
+
from contextlib import ExitStack
|
|
6
|
+
import hashlib
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import mimetypes
|
|
10
|
+
import os
|
|
11
|
+
import time
|
|
12
|
+
from collections.abc import Mapping, Sequence
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Literal
|
|
16
|
+
import urllib.parse
|
|
17
|
+
import urllib.request
|
|
18
|
+
|
|
19
|
+
from dotenv import load_dotenv
|
|
20
|
+
from google import genai
|
|
21
|
+
from google.genai import types
|
|
22
|
+
from google.genai.types import HttpOptions
|
|
23
|
+
import httpx
|
|
24
|
+
from openai import AsyncOpenAI
|
|
25
|
+
|
|
26
|
+
ClientKind = Literal[
|
|
27
|
+
"openai",
|
|
28
|
+
"openrouter",
|
|
29
|
+
"gemini-interactions",
|
|
30
|
+
"gemini-generate-content",
|
|
31
|
+
]
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class GeneratedImage:
|
|
37
|
+
data: bytes
|
|
38
|
+
mime_type: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class ImageModel:
|
|
43
|
+
model_displayname: str
|
|
44
|
+
model_realname: str
|
|
45
|
+
client: ClientKind
|
|
46
|
+
api_key_env: str
|
|
47
|
+
base_url: str | None = None
|
|
48
|
+
default_mime_type: str = "image/png"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class _ResolvedInputImage:
|
|
53
|
+
path: Path
|
|
54
|
+
mime_type: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
IMAGE_MODELS: Mapping[str, ImageModel] = {
|
|
58
|
+
"openrouter/gpt-image-2": ImageModel(
|
|
59
|
+
model_displayname="GPT Image 2 (OpenRouter)",
|
|
60
|
+
model_realname="openai/gpt-image-2",
|
|
61
|
+
client="openrouter",
|
|
62
|
+
api_key_env="OPENROUTER_API_KEY",
|
|
63
|
+
base_url="https://openrouter.ai/api/v1",
|
|
64
|
+
),
|
|
65
|
+
"zenmux/gpt-image-2": ImageModel(
|
|
66
|
+
model_displayname="GPT Image 2 (Zenmux)",
|
|
67
|
+
model_realname="openai/gpt-image-2",
|
|
68
|
+
client="openai",
|
|
69
|
+
api_key_env="ZENMUX_API_KEY",
|
|
70
|
+
base_url="https://zenmux.ai/api/v1",
|
|
71
|
+
),
|
|
72
|
+
"302ai/gpt-image-2": ImageModel(
|
|
73
|
+
model_displayname="GPT Image 2 (302AI)",
|
|
74
|
+
model_realname="gpt-image-2",
|
|
75
|
+
client="openai",
|
|
76
|
+
api_key_env="AI302_API_KEY",
|
|
77
|
+
base_url="https://api.302.ai/v1",
|
|
78
|
+
),
|
|
79
|
+
"zenmux/gemini-3.1-flash-image": ImageModel(
|
|
80
|
+
model_displayname="Gemini-3.1-Flash-Image (Zenmux)",
|
|
81
|
+
model_realname="google/gemini-3.1-flash-image",
|
|
82
|
+
client="gemini-generate-content",
|
|
83
|
+
api_key_env="ZENMUX_API_KEY",
|
|
84
|
+
base_url="https://zenmux.ai/api/vertex-ai",
|
|
85
|
+
),
|
|
86
|
+
"google/gemini-3.1-flash-image": ImageModel(
|
|
87
|
+
model_displayname="Gemini-3.1-Flash-Image (Google)",
|
|
88
|
+
model_realname="gemini-3.1-flash-image",
|
|
89
|
+
client="gemini-interactions",
|
|
90
|
+
api_key_env="GEMINI_API_KEY",
|
|
91
|
+
),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
IMAGE_SUFFIXES: Mapping[str, str] = {
|
|
95
|
+
"image/jpeg": ".jpg",
|
|
96
|
+
"image/png": ".png",
|
|
97
|
+
"image/webp": ".webp",
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
URL_INPUT_IMAGE_CACHE_DIR = Path("/tmp/tena/input_images")
|
|
101
|
+
URL_DOWNLOAD_TIMEOUT_SECONDS = 30
|
|
102
|
+
OPENROUTER_REQUEST_TIMEOUT_SECONDS = 300
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def suffix_for_mime_type(mime_type: str) -> str:
|
|
106
|
+
return IMAGE_SUFFIXES.get(mime_type, ".bin")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
async def draw(
|
|
110
|
+
*,
|
|
111
|
+
model_path: str,
|
|
112
|
+
prompt: str,
|
|
113
|
+
size: str = "auto",
|
|
114
|
+
number: int = 1,
|
|
115
|
+
input_images: Sequence[str] | None = None,
|
|
116
|
+
web_search: bool | Mapping[str, Any] = False,
|
|
117
|
+
) -> list[GeneratedImage]:
|
|
118
|
+
"""Generate images with a registered text-to-image model.
|
|
119
|
+
|
|
120
|
+
The `size` parameter is intentionally model/client specific:
|
|
121
|
+
- OpenAI-compatible clients receive it as the API `size` value directly.
|
|
122
|
+
- OpenRouter receives it as the API `size` value directly.
|
|
123
|
+
- Gemini clients parse it into their image response config, supporting
|
|
124
|
+
values like `16:9`, `2K`, and `16:9@2K`.
|
|
125
|
+
"""
|
|
126
|
+
if number < 1:
|
|
127
|
+
logger.debug("Rejecting image request with invalid number=%s", number)
|
|
128
|
+
raise ValueError("number must be greater than or equal to 1")
|
|
129
|
+
if web_search is not False:
|
|
130
|
+
logger.debug("Rejecting image request because web_search is not implemented")
|
|
131
|
+
raise NotImplementedError("web_search is not implemented yet")
|
|
132
|
+
|
|
133
|
+
start_time = time.perf_counter()
|
|
134
|
+
model = _get_image_model(model_path)
|
|
135
|
+
logger.info(
|
|
136
|
+
"Starting image generation: model_path=%s model=%s client=%s size=%s "
|
|
137
|
+
"number=%s input_images=%s prompt_length=%s",
|
|
138
|
+
model_path,
|
|
139
|
+
model.model_realname,
|
|
140
|
+
model.client,
|
|
141
|
+
size,
|
|
142
|
+
number,
|
|
143
|
+
len(input_images or ()),
|
|
144
|
+
len(prompt),
|
|
145
|
+
)
|
|
146
|
+
api_key = _get_api_key(model)
|
|
147
|
+
resolved_input_images = _resolve_input_images(input_images)
|
|
148
|
+
|
|
149
|
+
if model.client == "openai":
|
|
150
|
+
images = await _draw_with_openai(
|
|
151
|
+
model=model,
|
|
152
|
+
api_key=api_key,
|
|
153
|
+
prompt=prompt,
|
|
154
|
+
size=size,
|
|
155
|
+
number=number,
|
|
156
|
+
input_images=resolved_input_images,
|
|
157
|
+
)
|
|
158
|
+
elif model.client == "openrouter":
|
|
159
|
+
images = await _draw_with_openrouter(
|
|
160
|
+
model=model,
|
|
161
|
+
api_key=api_key,
|
|
162
|
+
prompt=prompt,
|
|
163
|
+
size=size,
|
|
164
|
+
number=number,
|
|
165
|
+
input_images=resolved_input_images,
|
|
166
|
+
)
|
|
167
|
+
elif model.client == "gemini-interactions":
|
|
168
|
+
images = await _draw_with_gemini_interactions(
|
|
169
|
+
model=model,
|
|
170
|
+
api_key=api_key,
|
|
171
|
+
prompt=prompt,
|
|
172
|
+
size=size,
|
|
173
|
+
number=number,
|
|
174
|
+
input_images=resolved_input_images,
|
|
175
|
+
)
|
|
176
|
+
elif model.client == "gemini-generate-content":
|
|
177
|
+
images = await _draw_with_gemini_generate_content(
|
|
178
|
+
model=model,
|
|
179
|
+
api_key=api_key,
|
|
180
|
+
prompt=prompt,
|
|
181
|
+
size=size,
|
|
182
|
+
number=number,
|
|
183
|
+
input_images=resolved_input_images,
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
logger.debug("Unsupported image client resolved: %s", model.client)
|
|
187
|
+
raise ValueError(f"Unsupported client: {model.client}")
|
|
188
|
+
|
|
189
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
190
|
+
logger.info(
|
|
191
|
+
"Finished image generation: model_path=%s images=%s total_bytes=%s "
|
|
192
|
+
"elapsed=%.2fs",
|
|
193
|
+
model_path,
|
|
194
|
+
len(images),
|
|
195
|
+
_total_image_bytes(images),
|
|
196
|
+
elapsed_seconds,
|
|
197
|
+
)
|
|
198
|
+
return images
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _get_image_model(model_path: str) -> ImageModel:
|
|
202
|
+
model = IMAGE_MODELS.get(model_path)
|
|
203
|
+
if model is not None:
|
|
204
|
+
logger.debug(
|
|
205
|
+
"Resolved image model: model_path=%s display_name=%s client=%s",
|
|
206
|
+
model_path,
|
|
207
|
+
model.model_displayname,
|
|
208
|
+
model.client,
|
|
209
|
+
)
|
|
210
|
+
return model
|
|
211
|
+
|
|
212
|
+
available_models = ", ".join(sorted(IMAGE_MODELS))
|
|
213
|
+
logger.debug("Unknown image model requested: model_path=%s", model_path)
|
|
214
|
+
raise ValueError(
|
|
215
|
+
f"Unknown model_path: {model_path}. Available models: {available_models}"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _get_api_key(model: ImageModel) -> str:
|
|
220
|
+
_load_project_dotenv()
|
|
221
|
+
|
|
222
|
+
api_key = os.environ.get(model.api_key_env)
|
|
223
|
+
if api_key:
|
|
224
|
+
logger.debug("Resolved API key from environment: env=%s", model.api_key_env)
|
|
225
|
+
return api_key
|
|
226
|
+
|
|
227
|
+
logger.debug("Missing API key environment variable: env=%s", model.api_key_env)
|
|
228
|
+
raise RuntimeError(
|
|
229
|
+
f"Missing API key environment variable {model.api_key_env} for "
|
|
230
|
+
f"{model.model_displayname}"
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _load_project_dotenv() -> None:
|
|
235
|
+
env_path = Path(__file__).resolve().parents[1] / ".env"
|
|
236
|
+
logger.debug("Loading dotenv file if present: path=%s", env_path)
|
|
237
|
+
load_dotenv(dotenv_path=env_path, override=False)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _resolve_input_images(
|
|
241
|
+
input_images: Sequence[str] | None,
|
|
242
|
+
) -> list[_ResolvedInputImage]:
|
|
243
|
+
if not input_images:
|
|
244
|
+
logger.debug("No input images provided")
|
|
245
|
+
return []
|
|
246
|
+
|
|
247
|
+
resolved_images: list[_ResolvedInputImage] = []
|
|
248
|
+
for index, source in enumerate(input_images, start=1):
|
|
249
|
+
normalized_source = source.strip()
|
|
250
|
+
if not normalized_source:
|
|
251
|
+
logger.debug("Rejecting empty input image value: index=%s", index)
|
|
252
|
+
raise ValueError("input_images cannot contain empty values")
|
|
253
|
+
if _is_http_url(normalized_source):
|
|
254
|
+
resolved_images.append(_resolve_url_input_image(normalized_source))
|
|
255
|
+
else:
|
|
256
|
+
resolved_image = _resolve_local_input_image(normalized_source)
|
|
257
|
+
resolved_images.append(resolved_image)
|
|
258
|
+
logger.debug(
|
|
259
|
+
"Resolved local input image: index=%s filename=%s mime_type=%s "
|
|
260
|
+
"bytes=%s",
|
|
261
|
+
index,
|
|
262
|
+
resolved_image.path.name,
|
|
263
|
+
resolved_image.mime_type,
|
|
264
|
+
_file_size_or_none(resolved_image.path),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
logger.debug("Resolved input images: count=%s", len(resolved_images))
|
|
268
|
+
|
|
269
|
+
return resolved_images
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _is_http_url(value: str) -> bool:
|
|
273
|
+
parsed = urllib.parse.urlparse(value)
|
|
274
|
+
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _resolve_local_input_image(source: str) -> _ResolvedInputImage:
|
|
278
|
+
path = Path(source).expanduser()
|
|
279
|
+
if not path.exists():
|
|
280
|
+
logger.debug("Local input image does not exist: filename=%s", path.name)
|
|
281
|
+
raise FileNotFoundError(f"Input image does not exist: {path}")
|
|
282
|
+
if not path.is_file():
|
|
283
|
+
logger.debug("Local input image is not a file: filename=%s", path.name)
|
|
284
|
+
raise ValueError(f"Input image is not a file: {path}")
|
|
285
|
+
|
|
286
|
+
mime_type = _infer_image_mime_type(path=path)
|
|
287
|
+
if mime_type is None:
|
|
288
|
+
logger.debug(
|
|
289
|
+
"Could not infer local input image MIME type: filename=%s", path.name
|
|
290
|
+
)
|
|
291
|
+
raise ValueError(f"Could not infer image MIME type for: {path}")
|
|
292
|
+
|
|
293
|
+
return _ResolvedInputImage(path=path, mime_type=mime_type)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _resolve_url_input_image(url: str) -> _ResolvedInputImage:
|
|
297
|
+
url_hash = _hash_url(url)
|
|
298
|
+
host = urllib.parse.urlparse(url).netloc
|
|
299
|
+
cache_dir = URL_INPUT_IMAGE_CACHE_DIR / hashlib.sha256(url.encode()).hexdigest()
|
|
300
|
+
metadata_path = cache_dir / "metadata.json"
|
|
301
|
+
cached_image = _read_cached_url_input_image(url, metadata_path)
|
|
302
|
+
if cached_image is not None:
|
|
303
|
+
logger.info(
|
|
304
|
+
"Using cached URL input image: url_hash=%s host=%s mime_type=%s bytes=%s",
|
|
305
|
+
url_hash,
|
|
306
|
+
host,
|
|
307
|
+
cached_image.mime_type,
|
|
308
|
+
_file_size_or_none(cached_image.path),
|
|
309
|
+
)
|
|
310
|
+
return cached_image
|
|
311
|
+
|
|
312
|
+
logger.info("Downloading URL input image: url_hash=%s host=%s", url_hash, host)
|
|
313
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
314
|
+
request = urllib.request.Request(url, headers={"User-Agent": "tena/0.1.0"})
|
|
315
|
+
with urllib.request.urlopen(
|
|
316
|
+
request,
|
|
317
|
+
timeout=URL_DOWNLOAD_TIMEOUT_SECONDS,
|
|
318
|
+
) as response:
|
|
319
|
+
data = response.read()
|
|
320
|
+
header_mime_type = response.headers.get_content_type()
|
|
321
|
+
|
|
322
|
+
mime_type = _infer_image_mime_type(
|
|
323
|
+
path=Path(urllib.parse.urlparse(url).path),
|
|
324
|
+
data=data,
|
|
325
|
+
declared_mime_type=header_mime_type,
|
|
326
|
+
)
|
|
327
|
+
if mime_type is None:
|
|
328
|
+
logger.debug(
|
|
329
|
+
"URL input image MIME type inference failed: url_hash=%s host=%s "
|
|
330
|
+
"declared_mime_type=%s bytes=%s",
|
|
331
|
+
url_hash,
|
|
332
|
+
host,
|
|
333
|
+
header_mime_type,
|
|
334
|
+
len(data),
|
|
335
|
+
)
|
|
336
|
+
raise ValueError(f"URL did not return a supported image: {url}")
|
|
337
|
+
|
|
338
|
+
suffix = _suffix_for_mime_type(mime_type)
|
|
339
|
+
image_path = cache_dir / f"source{suffix}"
|
|
340
|
+
temporary_path = cache_dir / f".source{suffix}.tmp"
|
|
341
|
+
temporary_path.write_bytes(data)
|
|
342
|
+
temporary_path.replace(image_path)
|
|
343
|
+
|
|
344
|
+
metadata = {
|
|
345
|
+
"url": url,
|
|
346
|
+
"filename": image_path.name,
|
|
347
|
+
"mime_type": mime_type,
|
|
348
|
+
}
|
|
349
|
+
metadata_path.write_text(
|
|
350
|
+
json.dumps(metadata, indent=2, sort_keys=True),
|
|
351
|
+
encoding="utf-8",
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
logger.info(
|
|
355
|
+
"Downloaded URL input image: url_hash=%s host=%s mime_type=%s bytes=%s",
|
|
356
|
+
url_hash,
|
|
357
|
+
host,
|
|
358
|
+
mime_type,
|
|
359
|
+
len(data),
|
|
360
|
+
)
|
|
361
|
+
return _ResolvedInputImage(path=image_path, mime_type=mime_type)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _read_cached_url_input_image(
|
|
365
|
+
url: str,
|
|
366
|
+
metadata_path: Path,
|
|
367
|
+
) -> _ResolvedInputImage | None:
|
|
368
|
+
url_hash = _hash_url(url)
|
|
369
|
+
if not metadata_path.exists():
|
|
370
|
+
logger.debug("URL input image cache metadata missing: url_hash=%s", url_hash)
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
375
|
+
except OSError:
|
|
376
|
+
logger.debug(
|
|
377
|
+
"Could not read URL input image cache metadata: url_hash=%s",
|
|
378
|
+
url_hash,
|
|
379
|
+
exc_info=True,
|
|
380
|
+
)
|
|
381
|
+
return None
|
|
382
|
+
except json.JSONDecodeError:
|
|
383
|
+
logger.debug(
|
|
384
|
+
"Invalid URL input image cache metadata JSON: url_hash=%s",
|
|
385
|
+
url_hash,
|
|
386
|
+
exc_info=True,
|
|
387
|
+
)
|
|
388
|
+
return None
|
|
389
|
+
|
|
390
|
+
if metadata.get("url") != url:
|
|
391
|
+
logger.debug(
|
|
392
|
+
"URL input image cache metadata URL mismatch: url_hash=%s", url_hash
|
|
393
|
+
)
|
|
394
|
+
return None
|
|
395
|
+
|
|
396
|
+
filename = metadata.get("filename")
|
|
397
|
+
mime_type = metadata.get("mime_type")
|
|
398
|
+
if not isinstance(filename, str) or not _is_image_mime_type(mime_type):
|
|
399
|
+
logger.debug("URL input image cache metadata invalid: url_hash=%s", url_hash)
|
|
400
|
+
return None
|
|
401
|
+
|
|
402
|
+
image_path = metadata_path.parent / filename
|
|
403
|
+
if not image_path.is_file():
|
|
404
|
+
logger.debug("URL input image cache file missing: url_hash=%s", url_hash)
|
|
405
|
+
return None
|
|
406
|
+
|
|
407
|
+
return _ResolvedInputImage(path=image_path, mime_type=mime_type)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _infer_image_mime_type(
|
|
411
|
+
*,
|
|
412
|
+
path: Path,
|
|
413
|
+
data: bytes | None = None,
|
|
414
|
+
declared_mime_type: str | None = None,
|
|
415
|
+
) -> str | None:
|
|
416
|
+
if _is_image_mime_type(declared_mime_type):
|
|
417
|
+
return declared_mime_type
|
|
418
|
+
|
|
419
|
+
guessed_mime_type, _ = mimetypes.guess_type(path.name)
|
|
420
|
+
if _is_image_mime_type(guessed_mime_type):
|
|
421
|
+
return guessed_mime_type
|
|
422
|
+
|
|
423
|
+
if data is None:
|
|
424
|
+
try:
|
|
425
|
+
data = path.read_bytes()[:32]
|
|
426
|
+
except OSError:
|
|
427
|
+
return None
|
|
428
|
+
|
|
429
|
+
return _infer_image_mime_type_from_bytes(data)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def _infer_image_mime_type_from_bytes(data: bytes) -> str | None:
|
|
433
|
+
if data.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
434
|
+
return "image/png"
|
|
435
|
+
if data.startswith(b"\xff\xd8\xff"):
|
|
436
|
+
return "image/jpeg"
|
|
437
|
+
if data.startswith((b"GIF87a", b"GIF89a")):
|
|
438
|
+
return "image/gif"
|
|
439
|
+
if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
|
|
440
|
+
return "image/webp"
|
|
441
|
+
if data.startswith(b"BM"):
|
|
442
|
+
return "image/bmp"
|
|
443
|
+
return None
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _is_image_mime_type(mime_type: object) -> bool:
|
|
447
|
+
return isinstance(mime_type, str) and mime_type.startswith("image/")
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _suffix_for_mime_type(mime_type: str) -> str:
|
|
451
|
+
suffix = IMAGE_SUFFIXES.get(mime_type) or mimetypes.guess_extension(mime_type)
|
|
452
|
+
return suffix or ".img"
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
async def _draw_with_openai(
|
|
456
|
+
*,
|
|
457
|
+
model: ImageModel,
|
|
458
|
+
api_key: str,
|
|
459
|
+
prompt: str,
|
|
460
|
+
size: str,
|
|
461
|
+
number: int,
|
|
462
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
463
|
+
) -> list[GeneratedImage]:
|
|
464
|
+
client = AsyncOpenAI(api_key=api_key, base_url=model.base_url)
|
|
465
|
+
request: dict[str, Any] = {
|
|
466
|
+
"model": model.model_realname,
|
|
467
|
+
"prompt": prompt,
|
|
468
|
+
"n": number,
|
|
469
|
+
}
|
|
470
|
+
if size:
|
|
471
|
+
request["size"] = size
|
|
472
|
+
|
|
473
|
+
start_time = time.perf_counter()
|
|
474
|
+
if input_images:
|
|
475
|
+
logger.debug(
|
|
476
|
+
"Calling OpenAI-compatible image edit: model=%s input_images=%s",
|
|
477
|
+
model.model_realname,
|
|
478
|
+
len(input_images),
|
|
479
|
+
)
|
|
480
|
+
with ExitStack() as stack:
|
|
481
|
+
request["image"] = [
|
|
482
|
+
stack.enter_context(input_image.path.open("rb"))
|
|
483
|
+
for input_image in input_images
|
|
484
|
+
]
|
|
485
|
+
result = await client.images.edit(**request)
|
|
486
|
+
else:
|
|
487
|
+
logger.debug(
|
|
488
|
+
"Calling OpenAI-compatible image generation: model=%s number=%s",
|
|
489
|
+
model.model_realname,
|
|
490
|
+
number,
|
|
491
|
+
)
|
|
492
|
+
result = await client.images.generate(**request)
|
|
493
|
+
|
|
494
|
+
images: list[GeneratedImage] = []
|
|
495
|
+
for item in result.data:
|
|
496
|
+
image_base64 = getattr(item, "b64_json", None)
|
|
497
|
+
if not image_base64:
|
|
498
|
+
logger.debug("Skipping OpenAI-compatible response item without b64_json")
|
|
499
|
+
continue
|
|
500
|
+
mime_type = getattr(item, "mime_type", None) or model.default_mime_type
|
|
501
|
+
images.append(
|
|
502
|
+
GeneratedImage(
|
|
503
|
+
data=base64.b64decode(image_base64),
|
|
504
|
+
mime_type=mime_type,
|
|
505
|
+
)
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
if not images:
|
|
509
|
+
logger.debug("OpenAI-compatible response did not include image data")
|
|
510
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
511
|
+
|
|
512
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
513
|
+
logger.debug(
|
|
514
|
+
"OpenAI-compatible image call completed: model=%s response_items=%s "
|
|
515
|
+
"decoded_images=%s total_bytes=%s elapsed=%.2fs",
|
|
516
|
+
model.model_realname,
|
|
517
|
+
len(result.data),
|
|
518
|
+
len(images),
|
|
519
|
+
_total_image_bytes(images),
|
|
520
|
+
elapsed_seconds,
|
|
521
|
+
)
|
|
522
|
+
return images
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
async def _draw_with_openrouter(
|
|
526
|
+
*,
|
|
527
|
+
model: ImageModel,
|
|
528
|
+
api_key: str,
|
|
529
|
+
prompt: str,
|
|
530
|
+
size: str,
|
|
531
|
+
number: int,
|
|
532
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
533
|
+
) -> list[GeneratedImage]:
|
|
534
|
+
url = _openrouter_images_url(model)
|
|
535
|
+
request = _build_openrouter_image_request(
|
|
536
|
+
model=model,
|
|
537
|
+
prompt=prompt,
|
|
538
|
+
size=size,
|
|
539
|
+
number=number,
|
|
540
|
+
input_images=input_images,
|
|
541
|
+
)
|
|
542
|
+
headers = {
|
|
543
|
+
"Authorization": f"Bearer {api_key}",
|
|
544
|
+
"Content-Type": "application/json",
|
|
545
|
+
}
|
|
546
|
+
timeout = httpx.Timeout(
|
|
547
|
+
OPENROUTER_REQUEST_TIMEOUT_SECONDS,
|
|
548
|
+
connect=URL_DOWNLOAD_TIMEOUT_SECONDS,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
start_time = time.perf_counter()
|
|
552
|
+
logger.debug(
|
|
553
|
+
"Calling OpenRouter image generation: model=%s number=%s input_images=%s",
|
|
554
|
+
model.model_realname,
|
|
555
|
+
number,
|
|
556
|
+
len(input_images),
|
|
557
|
+
)
|
|
558
|
+
try:
|
|
559
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
560
|
+
response = await client.post(url, headers=headers, json=request)
|
|
561
|
+
except httpx.RequestError as exc:
|
|
562
|
+
logger.debug(
|
|
563
|
+
"OpenRouter image request failed before response: model=%s",
|
|
564
|
+
model.model_realname,
|
|
565
|
+
exc_info=True,
|
|
566
|
+
)
|
|
567
|
+
raise RuntimeError(f"OpenRouter image request failed: {exc}") from exc
|
|
568
|
+
|
|
569
|
+
if response.is_error:
|
|
570
|
+
message = _extract_openrouter_error_message(response)
|
|
571
|
+
logger.debug(
|
|
572
|
+
"OpenRouter image request failed: model=%s status=%s message=%s",
|
|
573
|
+
model.model_realname,
|
|
574
|
+
response.status_code,
|
|
575
|
+
message,
|
|
576
|
+
)
|
|
577
|
+
raise RuntimeError(
|
|
578
|
+
"OpenRouter image request failed with HTTP "
|
|
579
|
+
f"{response.status_code}: {message}"
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
try:
|
|
583
|
+
payload = response.json()
|
|
584
|
+
except ValueError as exc:
|
|
585
|
+
logger.debug("OpenRouter response was not valid JSON", exc_info=True)
|
|
586
|
+
raise RuntimeError(
|
|
587
|
+
f"Invalid JSON response returned for {model.model_displayname}"
|
|
588
|
+
) from exc
|
|
589
|
+
|
|
590
|
+
images = _extract_openrouter_images(payload, model)
|
|
591
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
592
|
+
logger.debug(
|
|
593
|
+
"OpenRouter image call completed: model=%s decoded_images=%s "
|
|
594
|
+
"total_bytes=%s elapsed=%.2fs",
|
|
595
|
+
model.model_realname,
|
|
596
|
+
len(images),
|
|
597
|
+
_total_image_bytes(images),
|
|
598
|
+
elapsed_seconds,
|
|
599
|
+
)
|
|
600
|
+
return images
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def _openrouter_images_url(model: ImageModel) -> str:
|
|
604
|
+
base_url = model.base_url or "https://openrouter.ai/api/v1"
|
|
605
|
+
return f"{base_url.rstrip('/')}/images"
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def _build_openrouter_image_request(
|
|
609
|
+
*,
|
|
610
|
+
model: ImageModel,
|
|
611
|
+
prompt: str,
|
|
612
|
+
size: str,
|
|
613
|
+
number: int,
|
|
614
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
615
|
+
) -> dict[str, Any]:
|
|
616
|
+
request: dict[str, Any] = {
|
|
617
|
+
"model": model.model_realname,
|
|
618
|
+
"prompt": prompt,
|
|
619
|
+
"n": number,
|
|
620
|
+
}
|
|
621
|
+
if size:
|
|
622
|
+
request["size"] = size
|
|
623
|
+
if input_images:
|
|
624
|
+
request["input_references"] = [
|
|
625
|
+
_build_openrouter_input_reference(input_image)
|
|
626
|
+
for input_image in input_images
|
|
627
|
+
]
|
|
628
|
+
return request
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _build_openrouter_input_reference(
|
|
632
|
+
input_image: _ResolvedInputImage,
|
|
633
|
+
) -> dict[str, dict[str, str] | str]:
|
|
634
|
+
return {
|
|
635
|
+
"type": "image_url",
|
|
636
|
+
"image_url": {
|
|
637
|
+
"url": _build_image_data_url(input_image),
|
|
638
|
+
},
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _build_image_data_url(input_image: _ResolvedInputImage) -> str:
|
|
643
|
+
image_base64 = base64.b64encode(input_image.path.read_bytes()).decode("utf-8")
|
|
644
|
+
return f"data:{input_image.mime_type};base64,{image_base64}"
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def _extract_openrouter_images(
|
|
648
|
+
payload: object,
|
|
649
|
+
model: ImageModel,
|
|
650
|
+
) -> list[GeneratedImage]:
|
|
651
|
+
if not isinstance(payload, Mapping):
|
|
652
|
+
logger.debug("OpenRouter response JSON was not an object")
|
|
653
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
654
|
+
|
|
655
|
+
data = payload.get("data")
|
|
656
|
+
if not isinstance(data, Sequence) or isinstance(data, (str, bytes, bytearray)):
|
|
657
|
+
logger.debug("OpenRouter response did not include a data array")
|
|
658
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
659
|
+
|
|
660
|
+
images: list[GeneratedImage] = []
|
|
661
|
+
for item in data:
|
|
662
|
+
if not isinstance(item, Mapping):
|
|
663
|
+
logger.debug("Skipping OpenRouter response item that is not an object")
|
|
664
|
+
continue
|
|
665
|
+
|
|
666
|
+
image_base64 = item.get("b64_json")
|
|
667
|
+
if not isinstance(image_base64, str) or not image_base64:
|
|
668
|
+
logger.debug("Skipping OpenRouter response item without b64_json")
|
|
669
|
+
continue
|
|
670
|
+
|
|
671
|
+
media_type = item.get("media_type")
|
|
672
|
+
mime_type = (
|
|
673
|
+
media_type if isinstance(media_type, str) else model.default_mime_type
|
|
674
|
+
)
|
|
675
|
+
images.append(
|
|
676
|
+
GeneratedImage(
|
|
677
|
+
data=base64.b64decode(image_base64),
|
|
678
|
+
mime_type=mime_type,
|
|
679
|
+
)
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
if not images:
|
|
683
|
+
logger.debug("OpenRouter response did not include image data")
|
|
684
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
685
|
+
|
|
686
|
+
return images
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _extract_openrouter_error_message(response: httpx.Response) -> str:
|
|
690
|
+
try:
|
|
691
|
+
payload = response.json()
|
|
692
|
+
except ValueError:
|
|
693
|
+
message = response.text.strip()
|
|
694
|
+
return message or response.reason_phrase
|
|
695
|
+
|
|
696
|
+
if not isinstance(payload, Mapping):
|
|
697
|
+
return response.reason_phrase
|
|
698
|
+
|
|
699
|
+
error = payload.get("error")
|
|
700
|
+
if isinstance(error, Mapping):
|
|
701
|
+
message = error.get("message")
|
|
702
|
+
if isinstance(message, str) and message:
|
|
703
|
+
return message
|
|
704
|
+
|
|
705
|
+
return response.reason_phrase
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
async def _draw_with_gemini_interactions(
|
|
709
|
+
*,
|
|
710
|
+
model: ImageModel,
|
|
711
|
+
api_key: str,
|
|
712
|
+
prompt: str,
|
|
713
|
+
size: str,
|
|
714
|
+
number: int,
|
|
715
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
716
|
+
) -> list[GeneratedImage]:
|
|
717
|
+
start_time = time.perf_counter()
|
|
718
|
+
logger.debug(
|
|
719
|
+
"Calling Gemini Interactions image generation: model=%s number=%s "
|
|
720
|
+
"input_images=%s",
|
|
721
|
+
model.model_realname,
|
|
722
|
+
number,
|
|
723
|
+
len(input_images),
|
|
724
|
+
)
|
|
725
|
+
images = await asyncio.gather(
|
|
726
|
+
*(
|
|
727
|
+
asyncio.to_thread(
|
|
728
|
+
_draw_one_with_gemini_interactions,
|
|
729
|
+
model=model,
|
|
730
|
+
api_key=api_key,
|
|
731
|
+
prompt=prompt,
|
|
732
|
+
size=size,
|
|
733
|
+
input_images=input_images,
|
|
734
|
+
)
|
|
735
|
+
for _ in range(number)
|
|
736
|
+
)
|
|
737
|
+
)
|
|
738
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
739
|
+
logger.debug(
|
|
740
|
+
"Gemini Interactions image calls completed: model=%s decoded_images=%s "
|
|
741
|
+
"total_bytes=%s elapsed=%.2fs",
|
|
742
|
+
model.model_realname,
|
|
743
|
+
len(images),
|
|
744
|
+
_total_image_bytes(images),
|
|
745
|
+
elapsed_seconds,
|
|
746
|
+
)
|
|
747
|
+
return images
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
def _draw_one_with_gemini_interactions(
|
|
751
|
+
*,
|
|
752
|
+
model: ImageModel,
|
|
753
|
+
api_key: str,
|
|
754
|
+
prompt: str,
|
|
755
|
+
size: str,
|
|
756
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
757
|
+
) -> GeneratedImage:
|
|
758
|
+
start_time = time.perf_counter()
|
|
759
|
+
client = genai.Client(
|
|
760
|
+
api_key=api_key,
|
|
761
|
+
http_options=HttpOptions(base_url=model.base_url) if model.base_url else None,
|
|
762
|
+
)
|
|
763
|
+
request: dict[str, Any] = {
|
|
764
|
+
"model": model.model_realname,
|
|
765
|
+
"input": _build_gemini_input(prompt, input_images),
|
|
766
|
+
}
|
|
767
|
+
response_format = _parse_gemini_size(size)
|
|
768
|
+
if response_format is not None:
|
|
769
|
+
request["response_format"] = response_format
|
|
770
|
+
|
|
771
|
+
interaction = client.interactions.create(**request)
|
|
772
|
+
output_image = getattr(interaction, "output_image", None)
|
|
773
|
+
if output_image is None:
|
|
774
|
+
logger.debug("Gemini response did not include output_image")
|
|
775
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
776
|
+
|
|
777
|
+
image_base64 = getattr(output_image, "data", None)
|
|
778
|
+
if not image_base64:
|
|
779
|
+
logger.debug("Gemini output_image did not include data")
|
|
780
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
781
|
+
|
|
782
|
+
mime_type = getattr(output_image, "mime_type", None) or model.default_mime_type
|
|
783
|
+
image = GeneratedImage(
|
|
784
|
+
data=base64.b64decode(image_base64),
|
|
785
|
+
mime_type=mime_type,
|
|
786
|
+
)
|
|
787
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
788
|
+
logger.debug(
|
|
789
|
+
"Gemini single image call completed: model=%s mime_type=%s bytes=%s "
|
|
790
|
+
"elapsed=%.2fs",
|
|
791
|
+
model.model_realname,
|
|
792
|
+
image.mime_type,
|
|
793
|
+
len(image.data),
|
|
794
|
+
elapsed_seconds,
|
|
795
|
+
)
|
|
796
|
+
return image
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
async def _draw_with_gemini_generate_content(
|
|
800
|
+
*,
|
|
801
|
+
model: ImageModel,
|
|
802
|
+
api_key: str,
|
|
803
|
+
prompt: str,
|
|
804
|
+
size: str,
|
|
805
|
+
number: int,
|
|
806
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
807
|
+
) -> list[GeneratedImage]:
|
|
808
|
+
start_time = time.perf_counter()
|
|
809
|
+
logger.debug(
|
|
810
|
+
"Calling Gemini Generate Content image generation: model=%s number=%s "
|
|
811
|
+
"input_images=%s",
|
|
812
|
+
model.model_realname,
|
|
813
|
+
number,
|
|
814
|
+
len(input_images),
|
|
815
|
+
)
|
|
816
|
+
images = await asyncio.gather(
|
|
817
|
+
*(
|
|
818
|
+
asyncio.to_thread(
|
|
819
|
+
_draw_one_with_gemini_generate_content,
|
|
820
|
+
model=model,
|
|
821
|
+
api_key=api_key,
|
|
822
|
+
prompt=prompt,
|
|
823
|
+
size=size,
|
|
824
|
+
input_images=input_images,
|
|
825
|
+
)
|
|
826
|
+
for _ in range(number)
|
|
827
|
+
)
|
|
828
|
+
)
|
|
829
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
830
|
+
logger.debug(
|
|
831
|
+
"Gemini Generate Content image calls completed: model=%s decoded_images=%s "
|
|
832
|
+
"total_bytes=%s elapsed=%.2fs",
|
|
833
|
+
model.model_realname,
|
|
834
|
+
len(images),
|
|
835
|
+
_total_image_bytes(images),
|
|
836
|
+
elapsed_seconds,
|
|
837
|
+
)
|
|
838
|
+
return images
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def _draw_one_with_gemini_generate_content(
|
|
842
|
+
*,
|
|
843
|
+
model: ImageModel,
|
|
844
|
+
api_key: str,
|
|
845
|
+
prompt: str,
|
|
846
|
+
size: str,
|
|
847
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
848
|
+
) -> GeneratedImage:
|
|
849
|
+
start_time = time.perf_counter()
|
|
850
|
+
client = genai.Client(
|
|
851
|
+
api_key=api_key,
|
|
852
|
+
http_options=HttpOptions(base_url=model.base_url) if model.base_url else None,
|
|
853
|
+
)
|
|
854
|
+
response = client.models.generate_content(
|
|
855
|
+
model=model.model_realname,
|
|
856
|
+
contents=_build_gemini_generate_content_contents(prompt, input_images),
|
|
857
|
+
config=_build_gemini_generate_content_config(size),
|
|
858
|
+
)
|
|
859
|
+
image = _extract_gemini_generate_content_image(response, model)
|
|
860
|
+
|
|
861
|
+
elapsed_seconds = time.perf_counter() - start_time
|
|
862
|
+
logger.debug(
|
|
863
|
+
"Gemini Generate Content single image call completed: model=%s "
|
|
864
|
+
"mime_type=%s bytes=%s elapsed=%.2fs",
|
|
865
|
+
model.model_realname,
|
|
866
|
+
image.mime_type,
|
|
867
|
+
len(image.data),
|
|
868
|
+
elapsed_seconds,
|
|
869
|
+
)
|
|
870
|
+
return image
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _build_gemini_input(
|
|
874
|
+
prompt: str,
|
|
875
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
876
|
+
) -> str | list[dict[str, str]]:
|
|
877
|
+
if not input_images:
|
|
878
|
+
return prompt
|
|
879
|
+
|
|
880
|
+
logger.debug("Building Gemini multimodal input: input_images=%s", len(input_images))
|
|
881
|
+
input_parts: list[dict[str, str]] = []
|
|
882
|
+
for input_image in input_images:
|
|
883
|
+
input_parts.append(
|
|
884
|
+
{
|
|
885
|
+
"type": "image",
|
|
886
|
+
"data": base64.b64encode(input_image.path.read_bytes()).decode("utf-8"),
|
|
887
|
+
"mime_type": input_image.mime_type,
|
|
888
|
+
}
|
|
889
|
+
)
|
|
890
|
+
input_parts.append({"type": "text", "text": prompt})
|
|
891
|
+
return input_parts
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def _build_gemini_generate_content_contents(
|
|
895
|
+
prompt: str,
|
|
896
|
+
input_images: Sequence[_ResolvedInputImage],
|
|
897
|
+
) -> list[types.Part]:
|
|
898
|
+
parts = [types.Part.from_text(text=prompt)]
|
|
899
|
+
for input_image in input_images:
|
|
900
|
+
parts.append(
|
|
901
|
+
types.Part.from_bytes(
|
|
902
|
+
data=input_image.path.read_bytes(),
|
|
903
|
+
mime_type=input_image.mime_type,
|
|
904
|
+
)
|
|
905
|
+
)
|
|
906
|
+
return parts
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def _build_gemini_generate_content_config(
|
|
910
|
+
size: str,
|
|
911
|
+
) -> types.GenerateContentConfig:
|
|
912
|
+
response_format = _parse_gemini_size(size)
|
|
913
|
+
image_config = None
|
|
914
|
+
if response_format is not None:
|
|
915
|
+
image_config = types.ImageConfig(
|
|
916
|
+
aspect_ratio=response_format.get("aspect_ratio"),
|
|
917
|
+
image_size=response_format.get("image_size"),
|
|
918
|
+
)
|
|
919
|
+
return types.GenerateContentConfig(
|
|
920
|
+
response_modalities=["TEXT", "IMAGE"],
|
|
921
|
+
image_config=image_config,
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def _extract_gemini_generate_content_image(
|
|
926
|
+
response: types.GenerateContentResponse,
|
|
927
|
+
model: ImageModel,
|
|
928
|
+
) -> GeneratedImage:
|
|
929
|
+
for part in response.parts or ():
|
|
930
|
+
inline_data = getattr(part, "inline_data", None)
|
|
931
|
+
if inline_data is None:
|
|
932
|
+
continue
|
|
933
|
+
|
|
934
|
+
image_data = getattr(inline_data, "data", None)
|
|
935
|
+
if not image_data:
|
|
936
|
+
logger.debug("Skipping Gemini Generate Content part without image data")
|
|
937
|
+
continue
|
|
938
|
+
|
|
939
|
+
mime_type = getattr(inline_data, "mime_type", None) or model.default_mime_type
|
|
940
|
+
if isinstance(image_data, str):
|
|
941
|
+
image_bytes = base64.b64decode(image_data)
|
|
942
|
+
else:
|
|
943
|
+
image_bytes = image_data
|
|
944
|
+
return GeneratedImage(data=image_bytes, mime_type=mime_type)
|
|
945
|
+
|
|
946
|
+
logger.debug("Gemini Generate Content response did not include image data")
|
|
947
|
+
raise RuntimeError(f"No image data returned for {model.model_displayname}")
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
def _parse_gemini_size(size: str) -> dict[str, str] | None:
|
|
951
|
+
# Gemini does not use the same size string contract as OpenAI image APIs.
|
|
952
|
+
# Keep the public `draw` signature simple by translating one string into
|
|
953
|
+
# Gemini's `response_format` fields here.
|
|
954
|
+
normalized_size = size.strip()
|
|
955
|
+
if not normalized_size or normalized_size == "auto":
|
|
956
|
+
return None
|
|
957
|
+
|
|
958
|
+
response_format = {"type": "image"}
|
|
959
|
+
if "@" in normalized_size:
|
|
960
|
+
aspect_ratio, image_size = normalized_size.split("@", maxsplit=1)
|
|
961
|
+
aspect_ratio = aspect_ratio.strip()
|
|
962
|
+
image_size = image_size.strip()
|
|
963
|
+
if aspect_ratio:
|
|
964
|
+
response_format["aspect_ratio"] = aspect_ratio
|
|
965
|
+
if image_size:
|
|
966
|
+
response_format["image_size"] = image_size
|
|
967
|
+
return response_format
|
|
968
|
+
|
|
969
|
+
if ":" in normalized_size:
|
|
970
|
+
response_format["aspect_ratio"] = normalized_size
|
|
971
|
+
return response_format
|
|
972
|
+
|
|
973
|
+
response_format["image_size"] = normalized_size
|
|
974
|
+
return response_format
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def _hash_url(url: str) -> str:
|
|
978
|
+
return hashlib.sha256(url.encode()).hexdigest()[:12]
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
def _file_size_or_none(path: Path) -> int | None:
|
|
982
|
+
try:
|
|
983
|
+
return path.stat().st_size
|
|
984
|
+
except OSError:
|
|
985
|
+
logger.debug("Could not stat file: filename=%s", path.name, exc_info=True)
|
|
986
|
+
return None
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def _total_image_bytes(images: Sequence[GeneratedImage]) -> int:
|
|
990
|
+
return sum(len(image.data) for image in images)
|
tena/main.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
#
|
|
3
|
+
# main.py CLI 入口
|
|
4
|
+
#
|
|
5
|
+
#
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
import sys
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from tena.image import IMAGE_MODELS, GeneratedImage, draw
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
LOG_LEVELS = {
|
|
20
|
+
"DEBUG": logging.DEBUG,
|
|
21
|
+
"INFO": logging.INFO,
|
|
22
|
+
"WARNING": logging.WARNING,
|
|
23
|
+
"ERROR": logging.ERROR,
|
|
24
|
+
"CRITICAL": logging.CRITICAL,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _build_cli_parser() -> argparse.ArgumentParser:
|
|
29
|
+
parser = argparse.ArgumentParser(
|
|
30
|
+
prog="tena",
|
|
31
|
+
description="Generate images with a registered text-to-image model.",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--model-path",
|
|
35
|
+
choices=sorted(IMAGE_MODELS),
|
|
36
|
+
required=True,
|
|
37
|
+
help="Registered image model path.",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--prompt",
|
|
41
|
+
required=True,
|
|
42
|
+
help="Text prompt. Use '-' to read the prompt from stdin.",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--output",
|
|
46
|
+
type=Path,
|
|
47
|
+
required=True,
|
|
48
|
+
help="Required output image path.",
|
|
49
|
+
)
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"--size",
|
|
52
|
+
default="auto",
|
|
53
|
+
help="Model-specific image size. Defaults to 'auto'.",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--number",
|
|
57
|
+
type=int,
|
|
58
|
+
default=1,
|
|
59
|
+
help="Number of images to generate. Defaults to 1.",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--input-image",
|
|
63
|
+
action="append",
|
|
64
|
+
dest="input_images",
|
|
65
|
+
default=None,
|
|
66
|
+
help="Reference image path or URL. Repeat to provide multiple images.",
|
|
67
|
+
)
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--log-level",
|
|
70
|
+
default="INFO",
|
|
71
|
+
type=_parse_log_level,
|
|
72
|
+
metavar="LEVEL",
|
|
73
|
+
help=(
|
|
74
|
+
"Log level for stderr output: DEBUG, INFO, WARNING, ERROR, or "
|
|
75
|
+
"CRITICAL. Defaults to INFO."
|
|
76
|
+
),
|
|
77
|
+
)
|
|
78
|
+
return parser
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _parse_log_level(value: str) -> int:
|
|
82
|
+
normalized_value = value.upper()
|
|
83
|
+
level = LOG_LEVELS.get(normalized_value)
|
|
84
|
+
if level is None:
|
|
85
|
+
expected_levels = ", ".join(LOG_LEVELS)
|
|
86
|
+
raise argparse.ArgumentTypeError(
|
|
87
|
+
f"invalid log level: {value}. Expected one of: {expected_levels}"
|
|
88
|
+
)
|
|
89
|
+
return level
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _configure_logging(log_level: int) -> None:
|
|
93
|
+
logging.basicConfig(
|
|
94
|
+
level=log_level,
|
|
95
|
+
format="%(levelname)s %(name)s: %(message)s",
|
|
96
|
+
stream=sys.stderr,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _read_cli_prompt(prompt: str) -> str:
|
|
101
|
+
if prompt == "-":
|
|
102
|
+
return sys.stdin.read()
|
|
103
|
+
return prompt
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _numbered_output_path(output_path: Path, index: int) -> Path:
|
|
107
|
+
if index == 0:
|
|
108
|
+
return output_path
|
|
109
|
+
|
|
110
|
+
return output_path.with_name(f"{output_path.stem}-{index + 1}{output_path.suffix}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _write_images(images: Sequence[GeneratedImage], output_path: Path) -> list[Path]:
|
|
114
|
+
output_path = output_path.expanduser()
|
|
115
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
|
|
117
|
+
written_paths: list[Path] = []
|
|
118
|
+
for index, image in enumerate(images):
|
|
119
|
+
image_path = _numbered_output_path(output_path, index)
|
|
120
|
+
image_path.write_bytes(image.data)
|
|
121
|
+
written_paths.append(image_path)
|
|
122
|
+
logger.debug(
|
|
123
|
+
"Wrote generated image: path=%s mime_type=%s bytes=%s",
|
|
124
|
+
image_path,
|
|
125
|
+
image.mime_type,
|
|
126
|
+
len(image.data),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return written_paths
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
133
|
+
parser = _build_cli_parser()
|
|
134
|
+
args = parser.parse_args(argv)
|
|
135
|
+
_configure_logging(args.log_level)
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
logger.debug(
|
|
139
|
+
"Starting tena CLI: model_path=%s size=%s number=%s input_images=%s",
|
|
140
|
+
args.model_path,
|
|
141
|
+
args.size,
|
|
142
|
+
args.number,
|
|
143
|
+
len(args.input_images or ()),
|
|
144
|
+
)
|
|
145
|
+
prompt = _read_cli_prompt(args.prompt)
|
|
146
|
+
images = asyncio.run(
|
|
147
|
+
draw(
|
|
148
|
+
model_path=args.model_path,
|
|
149
|
+
prompt=prompt,
|
|
150
|
+
size=args.size,
|
|
151
|
+
number=args.number,
|
|
152
|
+
input_images=args.input_images,
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
written_paths = _write_images(images, args.output)
|
|
156
|
+
except Exception as exc:
|
|
157
|
+
logger.debug("tena CLI failed", exc_info=True)
|
|
158
|
+
print(f"tena: error: {exc}", file=sys.stderr)
|
|
159
|
+
return 1
|
|
160
|
+
|
|
161
|
+
for image_path in written_paths:
|
|
162
|
+
print(image_path)
|
|
163
|
+
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tena
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: My Text-to-Image kits
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: google-genai>=2.10.0
|
|
7
|
+
Requires-Dist: httpx>=0.28.1
|
|
8
|
+
Requires-Dist: openai>=2.44.0
|
|
9
|
+
Requires-Dist: pytest>=9.0.2
|
|
10
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# tena
|
|
14
|
+
|
|
15
|
+
`tena` provides a small async wrapper for text-to-image model calls.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
Prerequisites:
|
|
20
|
+
|
|
21
|
+
- Python 3.10+
|
|
22
|
+
- uv
|
|
23
|
+
|
|
24
|
+
Dependencies are declared in `pyproject.toml`:
|
|
25
|
+
|
|
26
|
+
- `openai`
|
|
27
|
+
- `google-genai`
|
|
28
|
+
- `httpx`
|
|
29
|
+
- `pytest`
|
|
30
|
+
- `python-dotenv`
|
|
31
|
+
|
|
32
|
+
## Image generation
|
|
33
|
+
|
|
34
|
+
### Python API
|
|
35
|
+
|
|
36
|
+
Use `draw` with a registered `model_path`. The function returns a list of
|
|
37
|
+
`GeneratedImage` objects. Each object contains image bytes and a MIME type.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
import asyncio
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
|
|
43
|
+
from tena import draw, suffix_for_mime_type
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
async def main() -> None:
|
|
47
|
+
images = await draw(
|
|
48
|
+
model_path="openrouter/gpt-image-2",
|
|
49
|
+
prompt="A clean product photo of a ceramic cup",
|
|
50
|
+
size="1024x1024",
|
|
51
|
+
number=1,
|
|
52
|
+
input_images=[
|
|
53
|
+
"./reference-1.png",
|
|
54
|
+
"https://example.com/reference-2.png",
|
|
55
|
+
],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
image = images[0]
|
|
59
|
+
suffix = suffix_for_mime_type(image.mime_type)
|
|
60
|
+
Path(f"result{suffix}").write_bytes(image.data)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
asyncio.run(main())
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
The package exports the reusable API from `tena/__init__.py`. `tena/main.py` is
|
|
67
|
+
only the CLI entrypoint; core image generation logic lives in `tena/image.py`.
|
|
68
|
+
|
|
69
|
+
### CLI
|
|
70
|
+
|
|
71
|
+
The `tena` command is a thin wrapper around the Python API.
|
|
72
|
+
|
|
73
|
+
Generate one image and write it to a required output path:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
uv run tena --model-path openrouter/gpt-image-2 --prompt "A clean product photo of a ceramic cup" --output ./result.png --size 1024x1024
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The output path supports `~`, including `--output ~/Downloads/result.png` and
|
|
80
|
+
`--output=~/Downloads/result.png`.
|
|
81
|
+
|
|
82
|
+
Use `-` as the prompt to read from stdin:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
echo "A simple red cube on a clean white background" | uv run tena --model-path openrouter/gpt-image-2 --prompt - --output ./result.png --size 1024x1024
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Generate multiple images with `--number`. The first image uses the exact output
|
|
89
|
+
path, and later images are numbered:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
uv run tena --model-path zenmux/gpt-image-2 --prompt "A blue sphere" --output ./result.png --number 2 --size 1024x1024
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
This writes `result.png` and `result-2.png`.
|
|
96
|
+
|
|
97
|
+
Provide reference images with repeated `--input-image` arguments. Each value can
|
|
98
|
+
be a local file path or an `http`/`https` URL:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
uv run tena \
|
|
102
|
+
--model-path openrouter/gpt-image-2 \
|
|
103
|
+
--prompt "Create a studio product photo using the reference objects" \
|
|
104
|
+
--input-image ./reference-1.png \
|
|
105
|
+
--input-image https://example.com/reference-2.png \
|
|
106
|
+
--output ./result.png \
|
|
107
|
+
--size 1024x1024
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
URL images are downloaded to `/tmp/tena/input_images` and cached by URL, so the
|
|
111
|
+
same URL is reused on later runs instead of downloaded again.
|
|
112
|
+
|
|
113
|
+
CLI logs are written to stderr and default to `INFO`. The generated output paths
|
|
114
|
+
are still written to stdout. Use `--log-level WARNING` to hide normal progress
|
|
115
|
+
logs, or `--log-level DEBUG` for diagnostic details:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
uv run tena --model-path openrouter/gpt-image-2 --prompt "A blue sphere" --output ./result.png --log-level WARNING
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
INFO logs include concise request metadata, URL cache/download status, byte
|
|
122
|
+
counts, and elapsed times. DEBUG logs include more detailed SDK and input image
|
|
123
|
+
diagnostics. Logs do not include API keys, full prompts, full input URLs, or
|
|
124
|
+
image data.
|
|
125
|
+
|
|
126
|
+
### Return object
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
@dataclass(frozen=True)
|
|
130
|
+
class GeneratedImage:
|
|
131
|
+
data: bytes
|
|
132
|
+
mime_type: str
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Registered models
|
|
136
|
+
|
|
137
|
+
Models are currently registered in code. `model_path` is the lookup key, and
|
|
138
|
+
`model_realname` is the model id sent to the upstream API.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
@dataclass(frozen=True)
|
|
142
|
+
class ImageModel:
|
|
143
|
+
model_displayname: str
|
|
144
|
+
model_realname: str
|
|
145
|
+
client: Literal[
|
|
146
|
+
"openai",
|
|
147
|
+
"openrouter",
|
|
148
|
+
"gemini-interactions",
|
|
149
|
+
"gemini-generate-content",
|
|
150
|
+
]
|
|
151
|
+
api_key_env: str
|
|
152
|
+
base_url: str | None = None
|
|
153
|
+
default_mime_type: str = "image/png"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Current model paths:
|
|
157
|
+
|
|
158
|
+
- `302ai/gpt-image-2`
|
|
159
|
+
- `302ai/gemini-3.1-flash-image-preview`
|
|
160
|
+
- `google/gemini-3.1-flash-image`
|
|
161
|
+
- `openrouter/gpt-image-2`
|
|
162
|
+
- `zenmux/gpt-image-2`
|
|
163
|
+
- `zenmux/gemini-3.1-flash-image`
|
|
164
|
+
|
|
165
|
+
`client` means which upstream API contract is used. OpenAI-compatible gateways
|
|
166
|
+
can use `client="openai"` with a custom `base_url`. OpenRouter uses
|
|
167
|
+
`client="openrouter"` and calls its `/images` unified image generation endpoint
|
|
168
|
+
directly.
|
|
169
|
+
|
|
170
|
+
Gemini image models use two different API contracts:
|
|
171
|
+
|
|
172
|
+
- `gemini-interactions` uses Google's Interactions API and is used for direct
|
|
173
|
+
Google Gemini access.
|
|
174
|
+
- `gemini-generate-content` uses the Generate Content API and is used for
|
|
175
|
+
Gemini-compatible gateways such as Zenmux and 302AI.
|
|
176
|
+
|
|
177
|
+
### Environment variables
|
|
178
|
+
|
|
179
|
+
Set the API key required by the selected model entry. Values can be provided by
|
|
180
|
+
the process environment or by a project-root `.env` file:
|
|
181
|
+
|
|
182
|
+
- `OPENROUTER_API_KEY`
|
|
183
|
+
- `ZENMUX_API_KEY`
|
|
184
|
+
- `AI302_API_KEY`
|
|
185
|
+
- `GEMINI_API_KEY`
|
|
186
|
+
|
|
187
|
+
### Gemini size format
|
|
188
|
+
|
|
189
|
+
Gemini image models use a single `size` string that is parsed into
|
|
190
|
+
the Gemini image response configuration. Interactions requests use
|
|
191
|
+
`response_format`; Generate Content requests use `image_config`.
|
|
192
|
+
|
|
193
|
+
- `16:9` -> `aspect_ratio`
|
|
194
|
+
- `2K` or `4K` -> `image_size`
|
|
195
|
+
- `16:9@2K` -> both `aspect_ratio` and `image_size`
|
|
196
|
+
- `auto` -> no explicit Gemini `response_format`
|
|
197
|
+
|
|
198
|
+
OpenAI-compatible and OpenRouter clients pass `size` through as the API `size`
|
|
199
|
+
parameter.
|
|
200
|
+
|
|
201
|
+
## Not implemented yet
|
|
202
|
+
|
|
203
|
+
The following parameters are part of the public function signature but are not
|
|
204
|
+
implemented yet:
|
|
205
|
+
|
|
206
|
+
- `web_search`
|
|
207
|
+
|
|
208
|
+
Passing `web_search` raises `NotImplementedError`.
|
|
209
|
+
|
|
210
|
+
## Integration tests
|
|
211
|
+
|
|
212
|
+
Live image generation tests are grouped under `tests/integration`. Set the API
|
|
213
|
+
key for the model you want to test in the environment or in the project `.env`
|
|
214
|
+
file:
|
|
215
|
+
|
|
216
|
+
- `OPENROUTER_API_KEY` for `openrouter/gpt-image-2`
|
|
217
|
+
- `ZENMUX_API_KEY` for `zenmux/gpt-image-2`
|
|
218
|
+
|
|
219
|
+
Then run:
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
pytest tests/integration
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Tests with missing API keys are skipped. When a test succeeds, it writes the
|
|
226
|
+
generated image to `~/Downloads` with a `tena-<provider>-gpt-image-2` filename
|
|
227
|
+
prefix.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
tena/__init__.py,sha256=Rym9hGQdH7Ekw7trrB4JOrPfaEof0VlJRvqAHQD-Mho,154
|
|
2
|
+
tena/image.py,sha256=99smWHfb7mq4BoKZkm3p_LHjOKFBPOqJFJf2EVMDD-Q,31026
|
|
3
|
+
tena/main.py,sha256=pFY4cthIsZo8SGSrPaNFpRR2IDK8-iJiHBh5e2AvYe4,4425
|
|
4
|
+
tena-0.1.0.dist-info/METADATA,sha256=Gts41nVYtGSPzU1_ZnB_NUsnkV9tkjR5JVEqPEKo4NQ,6245
|
|
5
|
+
tena-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
6
|
+
tena-0.1.0.dist-info/entry_points.txt,sha256=HphO135bSij_v6F6hX5wkMI-u8I2gufC_VApIZP4ZSc,40
|
|
7
|
+
tena-0.1.0.dist-info/RECORD,,
|