token0 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
token0/__init__.py ADDED
File without changes
token0/api/__init__.py ADDED
File without changes
token0/api/auth.py ADDED
@@ -0,0 +1,32 @@
1
+ """API key authentication middleware."""
2
+
3
+ import hashlib
4
+
5
+ from fastapi import HTTPException, Security
6
+ from fastapi.security import APIKeyHeader
7
+
8
+ from token0.config import settings
9
+
10
+ api_key_header = APIKeyHeader(name="X-Token0-Key", auto_error=False)
11
+
12
+
13
+ def hash_api_key(key: str) -> str:
14
+ return hashlib.sha256(key.encode()).hexdigest()
15
+
16
+
17
+ async def verify_api_key(api_key: str | None = Security(api_key_header)) -> str | None:
18
+ """Verify API key if provided. For open-source mode, auth is optional.
19
+
20
+ Returns the hashed key if valid, None if no key provided.
21
+ In cloud mode, this would be required.
22
+ """
23
+ if api_key is None:
24
+ # Open-source mode — no auth required
25
+ return None
26
+
27
+ # For now, validate against master key
28
+ if api_key == settings.token0_master_key:
29
+ return hash_api_key(api_key)
30
+
31
+ # TODO: Look up in database for multi-tenant cloud mode
32
+ raise HTTPException(status_code=401, detail="Invalid API key")
File without changes
token0/api/v1/chat.py ADDED
@@ -0,0 +1,306 @@
1
+ """Main /v1/chat/completions endpoint — the core proxy."""
2
+
3
+ import time
4
+ import uuid
5
+
6
+ from fastapi import APIRouter, Header, HTTPException
7
+
8
+ from token0.config import settings
9
+ from token0.models.db import Request
10
+ from token0.models.request import (
11
+ ChatRequest,
12
+ ChatResponse,
13
+ Choice,
14
+ Message,
15
+ Token0Usage,
16
+ UsageInfo,
17
+ )
18
+ from token0.optimization.analyzer import analyze_image
19
+ from token0.optimization.cache import get_cached_response, make_cache_key, set_cached_response
20
+ from token0.optimization.prompt_classifier import classify_prompt_detail, extract_prompt_text
21
+ from token0.optimization.router import OptimizationPlan, get_provider_from_model, plan_optimization
22
+ from token0.optimization.transformer import transform_image
23
+ from token0.providers.anthropic import AnthropicProvider
24
+ from token0.providers.base import BaseProvider, get_cost_per_token
25
+ from token0.providers.google import GoogleProvider
26
+ from token0.providers.openai import OpenAIProvider
27
+ from token0.storage.postgres import async_session
28
+
29
+ router = APIRouter()
30
+
31
+
32
+ def _get_provider(provider_name: str, api_key: str | None = None) -> BaseProvider:
33
+ """Instantiate the right provider with API key."""
34
+ if provider_name == "openai":
35
+ key = api_key or settings.openai_api_key
36
+ if not key:
37
+ raise HTTPException(400, "OpenAI API key required. Pass via X-Provider-Key header.")
38
+ return OpenAIProvider(api_key=key)
39
+ elif provider_name == "anthropic":
40
+ key = api_key or settings.anthropic_api_key
41
+ if not key:
42
+ raise HTTPException(400, "Anthropic API key required. Pass via X-Provider-Key header.")
43
+ return AnthropicProvider(api_key=key)
44
+ elif provider_name == "google":
45
+ key = api_key or settings.google_api_key
46
+ if not key:
47
+ raise HTTPException(400, "Google API key required. Pass via X-Provider-Key header.")
48
+ return GoogleProvider(api_key=key)
49
+ else:
50
+ raise HTTPException(400, f"Unsupported provider: {provider_name}")
51
+
52
+
53
+ @router.post("/chat/completions", response_model=ChatResponse)
54
+ async def chat_completions(
55
+ request: ChatRequest,
56
+ authorization: str | None = Header(None),
57
+ x_provider_key: str | None = Header(None),
58
+ x_token0_key: str | None = Header(None),
59
+ ):
60
+ start_time = time.time()
61
+
62
+ # --- Step 1: Classify the prompt ---
63
+ prompt_text = extract_prompt_text(
64
+ [{"role": m.role, "content": m.content} for m in request.messages]
65
+ )
66
+ prompt_detail = classify_prompt_detail(prompt_text) if request.token0_optimize else "auto"
67
+
68
+ # --- Step 2: Determine provider and model ---
69
+ actual_model = request.model
70
+ model_cascaded_to = None
71
+ provider_name = get_provider_from_model(request.model)
72
+
73
+ # --- Step 3: Process messages — find and optimize images ---
74
+ optimized_messages = []
75
+ total_tokens_before = 0
76
+ total_tokens_after = 0
77
+ optimizations_applied = []
78
+ plans: list[OptimizationPlan] = []
79
+ cache_key = None
80
+ first_pil_image = None # for cache key generation
81
+
82
+ for msg in request.messages:
83
+ if isinstance(msg.content, str):
84
+ optimized_messages.append({"role": msg.role, "content": msg.content})
85
+ continue
86
+
87
+ optimized_parts = []
88
+ for part in msg.content:
89
+ if part.type == "text":
90
+ optimized_parts.append({"type": "text", "text": part.text})
91
+ elif part.type == "image_url" and part.image_url and request.token0_optimize:
92
+ image_data = part.image_url.url
93
+ analysis, raw_bytes, pil_image = analyze_image(image_data)
94
+
95
+ # Save first image for cache key
96
+ if first_pil_image is None:
97
+ first_pil_image = pil_image
98
+
99
+ plan = plan_optimization(
100
+ analysis,
101
+ request.model,
102
+ detail_override=request.token0_detail_override,
103
+ prompt_detail=prompt_detail,
104
+ enable_cascade=request.token0_enable_cascade,
105
+ )
106
+ plans.append(plan)
107
+
108
+ # Check for model cascade recommendation
109
+ if plan.recommended_model and model_cascaded_to is None:
110
+ model_cascaded_to = plan.recommended_model
111
+ actual_model = plan.recommended_model
112
+
113
+ total_tokens_before += plan.estimated_tokens_before
114
+ total_tokens_after += plan.estimated_tokens_after
115
+ optimizations_applied.extend(plan.reasons)
116
+
117
+ if plan.use_ocr_route:
118
+ result = transform_image(plan, analysis, raw_bytes, pil_image)
119
+ optimized_parts.append(
120
+ {
121
+ "type": "text",
122
+ "text": f"[Extracted text from image]:\n{result['content']}",
123
+ }
124
+ )
125
+ elif any([plan.resize, plan.recompress_jpeg, plan.force_detail_low]):
126
+ result = transform_image(plan, analysis, raw_bytes, pil_image)
127
+ detail = "low" if plan.force_detail_low else (part.image_url.detail or "auto")
128
+ optimized_parts.append(
129
+ {
130
+ "type": "image_url",
131
+ "image_url": {
132
+ "url": f"data:{result['media_type']};base64,{result['base64']}",
133
+ "detail": detail,
134
+ },
135
+ }
136
+ )
137
+ else:
138
+ optimized_parts.append(
139
+ {
140
+ "type": "image_url",
141
+ "image_url": {"url": image_data, "detail": part.image_url.detail},
142
+ }
143
+ )
144
+ elif part.type == "image_url" and part.image_url:
145
+ optimized_parts.append(
146
+ {
147
+ "type": "image_url",
148
+ "image_url": {"url": part.image_url.url, "detail": part.image_url.detail},
149
+ }
150
+ )
151
+
152
+ optimized_messages.append({"role": msg.role, "content": optimized_parts})
153
+
154
+ # --- Step 4: Check semantic cache ---
155
+ cache_hit = False
156
+ if request.token0_enable_cache and first_pil_image is not None and prompt_text:
157
+ cache_key = make_cache_key(first_pil_image, prompt_text, actual_model)
158
+ cached = await get_cached_response(cache_key)
159
+ if cached:
160
+ cache_hit = True
161
+ latency_ms = int((time.time() - start_time) * 1000)
162
+
163
+ # Log cache hit
164
+ tokens_saved = total_tokens_before # saved everything
165
+ cost_per_input_token = get_cost_per_token(request.model, "input")
166
+ optimizations_applied.append("cache hit — 0 tokens")
167
+
168
+ async with async_session() as session:
169
+ db_request = Request(
170
+ provider=provider_name,
171
+ model=actual_model,
172
+ customer_id="00000000-0000-0000-0000-000000000000",
173
+ image_count=len(plans),
174
+ optimization_type="cache_hit",
175
+ tokens_original_estimate=total_tokens_before,
176
+ tokens_actual=0,
177
+ tokens_saved=total_tokens_before,
178
+ cost_original_estimate=total_tokens_before * cost_per_input_token,
179
+ cost_actual=0,
180
+ cost_saved=total_tokens_before * cost_per_input_token,
181
+ response_tokens=0,
182
+ latency_ms=latency_ms,
183
+ optimization_details={"cache_hit": True, "cache_key": cache_key},
184
+ )
185
+ session.add(db_request)
186
+ await session.commit()
187
+
188
+ return ChatResponse(
189
+ id=f"token0-{uuid.uuid4().hex[:12]}",
190
+ model=cached["model"],
191
+ choices=[
192
+ Choice(
193
+ index=0,
194
+ message=Message(role="assistant", content=cached["content"]),
195
+ finish_reason=cached.get("finish_reason", "stop"),
196
+ )
197
+ ],
198
+ usage=UsageInfo(
199
+ prompt_tokens=0,
200
+ completion_tokens=cached.get("completion_tokens", 0),
201
+ total_tokens=cached.get("completion_tokens", 0),
202
+ ),
203
+ token0=Token0Usage(
204
+ original_prompt_tokens_estimate=total_tokens_before,
205
+ optimized_prompt_tokens=0,
206
+ tokens_saved=total_tokens_before,
207
+ cost_saved_usd=round(total_tokens_before * cost_per_input_token, 6),
208
+ optimizations_applied=optimizations_applied,
209
+ cache_hit=True,
210
+ model_cascaded_to=model_cascaded_to,
211
+ ),
212
+ )
213
+
214
+ # --- Step 5: Resolve provider for actual model (may have been cascaded) ---
215
+ actual_provider_name = get_provider_from_model(actual_model)
216
+ provider = _get_provider(actual_provider_name, api_key=x_provider_key)
217
+
218
+ # --- Step 6: Forward to provider ---
219
+ provider_response = await provider.chat_completion(
220
+ model=actual_model,
221
+ messages=optimized_messages,
222
+ max_tokens=request.max_tokens,
223
+ temperature=request.temperature,
224
+ )
225
+
226
+ latency_ms = int((time.time() - start_time) * 1000)
227
+
228
+ # --- Step 7: Cache the response ---
229
+ if cache_key and not cache_hit:
230
+ await set_cached_response(
231
+ cache_key,
232
+ {
233
+ "model": provider_response.model,
234
+ "content": provider_response.content,
235
+ "finish_reason": provider_response.finish_reason,
236
+ "completion_tokens": provider_response.completion_tokens,
237
+ },
238
+ )
239
+
240
+ # --- Step 8: Calculate savings ---
241
+ tokens_saved = max(0, total_tokens_before - total_tokens_after)
242
+ # If model was cascaded, factor in the price difference
243
+ original_cost_per_token = get_cost_per_token(request.model, "input")
244
+ actual_cost_per_token = get_cost_per_token(actual_model, "input")
245
+
246
+ cost_before = total_tokens_before * original_cost_per_token
247
+ cost_after = total_tokens_after * actual_cost_per_token
248
+ cost_saved = max(0, cost_before - cost_after)
249
+
250
+ # --- Step 9: Log to database ---
251
+ async with async_session() as session:
252
+ db_request = Request(
253
+ provider=actual_provider_name,
254
+ model=actual_model,
255
+ customer_id="00000000-0000-0000-0000-000000000000",
256
+ image_count=len(plans),
257
+ optimization_type=", ".join(set(optimizations_applied)) or "none",
258
+ tokens_original_estimate=total_tokens_before,
259
+ tokens_actual=provider_response.prompt_tokens,
260
+ tokens_saved=tokens_saved,
261
+ cost_original_estimate=cost_before,
262
+ cost_actual=cost_after,
263
+ cost_saved=cost_saved,
264
+ response_tokens=provider_response.completion_tokens,
265
+ latency_ms=latency_ms,
266
+ optimization_details={
267
+ "plans": [
268
+ {
269
+ "reasons": p.reasons,
270
+ "before": p.estimated_tokens_before,
271
+ "after": p.estimated_tokens_after,
272
+ }
273
+ for p in plans
274
+ ],
275
+ "model_cascaded": model_cascaded_to,
276
+ "prompt_detail": prompt_detail,
277
+ },
278
+ )
279
+ session.add(db_request)
280
+ await session.commit()
281
+
282
+ return ChatResponse(
283
+ id=f"token0-{uuid.uuid4().hex[:12]}",
284
+ model=provider_response.model,
285
+ choices=[
286
+ Choice(
287
+ index=0,
288
+ message=Message(role="assistant", content=provider_response.content),
289
+ finish_reason=provider_response.finish_reason,
290
+ )
291
+ ],
292
+ usage=UsageInfo(
293
+ prompt_tokens=provider_response.prompt_tokens,
294
+ completion_tokens=provider_response.completion_tokens,
295
+ total_tokens=provider_response.total_tokens,
296
+ ),
297
+ token0=Token0Usage(
298
+ original_prompt_tokens_estimate=total_tokens_before,
299
+ optimized_prompt_tokens=provider_response.prompt_tokens,
300
+ tokens_saved=tokens_saved,
301
+ cost_saved_usd=round(cost_saved, 6),
302
+ optimizations_applied=optimizations_applied,
303
+ cache_hit=False,
304
+ model_cascaded_to=model_cascaded_to,
305
+ ),
306
+ )
token0/api/v1/usage.py ADDED
@@ -0,0 +1,46 @@
1
+ """Usage and savings dashboard endpoints."""
2
+
3
+ from fastapi import APIRouter
4
+ from sqlalchemy import func, select
5
+
6
+ from token0.models.db import Request
7
+ from token0.models.request import UsageSummary
8
+ from token0.storage.postgres import async_session
9
+
10
+ router = APIRouter()
11
+
12
+
13
+ @router.get("/usage", response_model=UsageSummary)
14
+ async def get_usage():
15
+ """Get aggregate usage and savings stats."""
16
+ async with async_session() as session:
17
+ result = await session.execute(
18
+ select(
19
+ func.count(Request.id).label("total_requests"),
20
+ func.coalesce(func.sum(Request.tokens_saved), 0).label("total_tokens_saved"),
21
+ func.coalesce(func.sum(Request.cost_saved), 0.0).label("total_cost_saved"),
22
+ func.coalesce(func.avg(Request.tokens_original_estimate), 0).label("avg_original"),
23
+ func.coalesce(func.avg(Request.tokens_actual), 0).label("avg_actual"),
24
+ )
25
+ )
26
+ row = result.one()
27
+
28
+ # Get optimization type breakdown
29
+ breakdown_result = await session.execute(
30
+ select(Request.optimization_type, func.count(Request.id)).group_by(
31
+ Request.optimization_type
32
+ )
33
+ )
34
+ breakdown = {opt_type: count for opt_type, count in breakdown_result.all()}
35
+
36
+ avg_original = float(row.avg_original) if row.avg_original else 0
37
+ avg_actual = float(row.avg_actual) if row.avg_actual else 0
38
+ compression_ratio = (avg_original / avg_actual) if avg_actual > 0 else 0
39
+
40
+ return UsageSummary(
41
+ total_requests=row.total_requests,
42
+ total_tokens_saved=int(row.total_tokens_saved),
43
+ total_cost_saved_usd=round(float(row.total_cost_saved), 4),
44
+ avg_compression_ratio=round(compression_ratio, 2),
45
+ optimization_breakdown=breakdown,
46
+ )
token0/cli.py ADDED
@@ -0,0 +1,46 @@
1
+ import argparse
2
+ import sys
3
+
4
+ import uvicorn
5
+
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser(
9
+ prog="token0",
10
+ description="Token0 — Vision LLM cost optimization proxy",
11
+ )
12
+ subparsers = parser.add_subparsers(dest="command")
13
+
14
+ serve_parser = subparsers.add_parser("serve", help="Start the Token0 API server")
15
+ serve_parser.add_argument("--host", default="0.0.0.0", help="Bind host (default: 0.0.0.0)")
16
+ serve_parser.add_argument("--port", type=int, default=8000, help="Bind port (default: 8000)")
17
+ serve_parser.add_argument(
18
+ "--reload",
19
+ action="store_true",
20
+ help="Enable auto-reload for development",
21
+ )
22
+ serve_parser.add_argument(
23
+ "--workers",
24
+ type=int,
25
+ default=1,
26
+ help="Number of worker processes (default: 1)",
27
+ )
28
+
29
+ args = parser.parse_args()
30
+
31
+ if args.command is None:
32
+ parser.print_help()
33
+ sys.exit(1)
34
+
35
+ if args.command == "serve":
36
+ uvicorn.run(
37
+ "token0.main:app",
38
+ host=args.host,
39
+ port=args.port,
40
+ reload=args.reload,
41
+ workers=args.workers,
42
+ )
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
token0/config.py ADDED
@@ -0,0 +1,55 @@
1
+ from pydantic_settings import BaseSettings
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ # Storage mode: "lite" (SQLite + in-memory) or "full" (Postgres + Redis + S3)
6
+ # Use "lite" for local dev/testing, "full" for production
7
+ storage_mode: str = "lite"
8
+
9
+ # Database — only needed in full mode
10
+ database_url: str = "postgresql+asyncpg://token0:token0@localhost:5432/token0"
11
+
12
+ # SQLite path — used in lite mode
13
+ sqlite_path: str = "token0.db"
14
+
15
+ # Redis — only needed in full mode
16
+ redis_url: str = "redis://localhost:6379/0"
17
+
18
+ # Object Storage — only needed in full mode
19
+ s3_endpoint: str = "http://localhost:9000"
20
+ s3_access_key: str = "minioadmin"
21
+ s3_secret_key: str = "minioadmin"
22
+ s3_bucket: str = "token0-images"
23
+
24
+ # LLM Provider Keys (optional defaults — users can pass their own per-request)
25
+ openai_api_key: str = ""
26
+ anthropic_api_key: str = ""
27
+ google_api_key: str = ""
28
+
29
+ # Server
30
+ host: str = "0.0.0.0"
31
+ port: int = 8000
32
+ log_level: str = "info"
33
+
34
+ # Auth
35
+ token0_master_key: str = "change-me-in-production"
36
+
37
+ # Optimization defaults
38
+ max_image_dimension: int = 1568 # Claude's max before auto-downscale
39
+ jpeg_quality: int = 85
40
+ text_density_threshold: float = 0.52 # Above this → OCR route instead of vision
41
+
42
+ @property
43
+ def is_lite(self) -> bool:
44
+ return self.storage_mode == "lite"
45
+
46
+ @property
47
+ def effective_database_url(self) -> str:
48
+ if self.is_lite:
49
+ return f"sqlite+aiosqlite:///{self.sqlite_path}"
50
+ return self.database_url
51
+
52
+ model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
53
+
54
+
55
+ settings = Settings()
token0/main.py ADDED
@@ -0,0 +1,52 @@
1
+ import logging
2
+ from contextlib import asynccontextmanager
3
+
4
+ from fastapi import FastAPI
5
+
6
+ from token0.api.v1.chat import router as chat_router
7
+ from token0.api.v1.usage import router as usage_router
8
+ from token0.config import settings
9
+ from token0.storage.postgres import close_db, init_db
10
+ from token0.storage.redis import close_redis, init_redis
11
+
12
+ logger = logging.getLogger("token0")
13
+
14
+
15
+ @asynccontextmanager
16
+ async def lifespan(app: FastAPI):
17
+ mode = settings.storage_mode
18
+ logger.info(f"Starting Token0 in {mode} mode")
19
+ if settings.is_lite:
20
+ logger.info(f" Database: SQLite ({settings.sqlite_path})")
21
+ logger.info(" Cache: in-memory")
22
+ logger.info(" Storage: local filesystem")
23
+ logger.info(" Tip: Set STORAGE_MODE=full for production (Postgres + Redis + S3)")
24
+ else:
25
+ logger.info(f" Database: {settings.database_url}")
26
+ logger.info(f" Cache: {settings.redis_url}")
27
+ logger.info(f" Storage: {settings.s3_endpoint}")
28
+ await init_db()
29
+ await init_redis()
30
+ yield
31
+ await close_db()
32
+ await close_redis()
33
+
34
+
35
+ app = FastAPI(
36
+ title="Token0",
37
+ description="Open-source API proxy that makes vision LLM calls 5-10x cheaper",
38
+ version="0.1.0",
39
+ lifespan=lifespan,
40
+ )
41
+
42
+ app.include_router(chat_router, prefix="/v1")
43
+ app.include_router(usage_router, prefix="/v1")
44
+
45
+
46
+ @app.get("/health")
47
+ async def health():
48
+ return {
49
+ "status": "ok",
50
+ "service": "token0",
51
+ "storage_mode": settings.storage_mode,
52
+ }
File without changes
token0/models/db.py ADDED
@@ -0,0 +1,85 @@
1
+ import uuid
2
+ from datetime import datetime
3
+
4
+ from sqlalchemy import JSON, DateTime, Float, Integer, String, Text, func
5
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
6
+
7
+
8
+ class Base(DeclarativeBase):
9
+ pass
10
+
11
+
12
+ class Customer(Base):
13
+ __tablename__ = "customers"
14
+
15
+ id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
16
+ name: Mapped[str] = mapped_column(String(255))
17
+ api_key_hash: Mapped[str] = mapped_column(String(128), unique=True, index=True)
18
+ # Customers can pass their own provider keys, stored encrypted
19
+ provider_keys: Mapped[dict | None] = mapped_column(JSON, nullable=True)
20
+ created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
21
+ is_active: Mapped[bool] = mapped_column(default=True)
22
+
23
+
24
+ class Request(Base):
25
+ __tablename__ = "requests"
26
+
27
+ id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
28
+ customer_id: Mapped[str] = mapped_column(String(36), index=True)
29
+ provider: Mapped[str] = mapped_column(String(50)) # openai, anthropic, google
30
+ model: Mapped[str] = mapped_column(String(100)) # gpt-4o, claude-sonnet-4-6, etc.
31
+
32
+ # Image metadata
33
+ original_width: Mapped[int | None] = mapped_column(Integer, nullable=True)
34
+ original_height: Mapped[int | None] = mapped_column(Integer, nullable=True)
35
+ original_size_bytes: Mapped[int | None] = mapped_column(Integer, nullable=True)
36
+ image_count: Mapped[int] = mapped_column(Integer, default=0)
37
+
38
+ # Optimization applied
39
+ optimization_type: Mapped[str] = mapped_column(
40
+ String(50)
41
+ ) # resize, ocr_route, detail_mode, none
42
+ optimized_width: Mapped[int | None] = mapped_column(Integer, nullable=True)
43
+ optimized_height: Mapped[int | None] = mapped_column(Integer, nullable=True)
44
+ detail_mode: Mapped[str | None] = mapped_column(String(20), nullable=True) # low, high, auto
45
+
46
+ # Token accounting
47
+ tokens_original_estimate: Mapped[int] = mapped_column(Integer) # what it would have cost
48
+ tokens_actual: Mapped[int] = mapped_column(Integer) # what it actually cost
49
+ tokens_saved: Mapped[int] = mapped_column(Integer)
50
+
51
+ # Cost accounting (USD)
52
+ cost_original_estimate: Mapped[float] = mapped_column(Float)
53
+ cost_actual: Mapped[float] = mapped_column(Float)
54
+ cost_saved: Mapped[float] = mapped_column(Float)
55
+
56
+ # Prompt/response metadata
57
+ prompt_text: Mapped[str | None] = mapped_column(Text, nullable=True)
58
+ response_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
59
+ latency_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
60
+
61
+ # Full optimization decision log
62
+ optimization_details: Mapped[dict | None] = mapped_column(JSON, nullable=True)
63
+
64
+ created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
65
+
66
+
67
+ class OptimizationProfile(Base):
68
+ """Learned optimization profiles — Month 3 feature.
69
+ Stores what works best per customer + content type + task type.
70
+ """
71
+
72
+ __tablename__ = "optimization_profiles"
73
+
74
+ id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
75
+ customer_id: Mapped[str] = mapped_column(String(36), index=True)
76
+ content_type: Mapped[str] = mapped_column(String(100)) # invoice, receipt, screenshot, photo
77
+ task_type: Mapped[str] = mapped_column(String(100)) # classify, extract, describe, ocr
78
+ recommended_optimization: Mapped[str] = mapped_column(String(50))
79
+ recommended_detail_mode: Mapped[str | None] = mapped_column(String(20), nullable=True)
80
+ recommended_max_dimension: Mapped[int | None] = mapped_column(Integer, nullable=True)
81
+ avg_quality_score: Mapped[float | None] = mapped_column(Float, nullable=True)
82
+ sample_count: Mapped[int] = mapped_column(Integer, default=0)
83
+ updated_at: Mapped[datetime] = mapped_column(
84
+ DateTime, server_default=func.now(), onupdate=func.now()
85
+ )