winebox 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
winebox/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """WineBox - Wine Cellar Management Application."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.3"
winebox/config.py CHANGED
@@ -35,6 +35,10 @@ class Settings(BaseSettings):
35
35
  # OCR
36
36
  tesseract_cmd: str | None = None # Use system default if None
37
37
 
38
+ # Claude Vision (for wine label scanning)
39
+ anthropic_api_key: str | None = None # Set WINEBOX_ANTHROPIC_API_KEY or ANTHROPIC_API_KEY
40
+ use_claude_vision: bool = True # Fall back to Tesseract if False or no API key
41
+
38
42
  # Authentication
39
43
  secret_key: str = generate_secret_key() # Override with WINEBOX_SECRET_KEY env var
40
44
  auth_enabled: bool = True # Set to False to disable authentication
winebox/routers/wines.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Wine management endpoints."""
2
2
 
3
+ import logging
3
4
  from typing import Annotated
4
5
  from uuid import UUID
5
6
 
@@ -14,14 +15,117 @@ from winebox.schemas.wine import WineCreate, WineResponse, WineUpdate, WineWithI
14
15
  from winebox.services.auth import RequireAuth
15
16
  from winebox.services.image_storage import ImageStorageService
16
17
  from winebox.services.ocr import OCRService
18
+ from winebox.services.vision import ClaudeVisionService
17
19
  from winebox.services.wine_parser import WineParserService
18
20
 
21
+ logger = logging.getLogger(__name__)
22
+
19
23
  router = APIRouter()
20
24
 
21
25
  # Service dependencies
22
26
  image_storage = ImageStorageService()
23
27
  ocr_service = OCRService()
24
28
  wine_parser = WineParserService()
29
+ vision_service = ClaudeVisionService()
30
+
31
+
32
+ def get_media_type(filename: str | None) -> str:
33
+ """Get media type from filename."""
34
+ if not filename:
35
+ return "image/jpeg"
36
+ ext = filename.lower().split(".")[-1]
37
+ return {
38
+ "jpg": "image/jpeg",
39
+ "jpeg": "image/jpeg",
40
+ "png": "image/png",
41
+ "gif": "image/gif",
42
+ "webp": "image/webp",
43
+ }.get(ext, "image/jpeg")
44
+
45
+
46
+ @router.post("/scan")
47
+ async def scan_label(
48
+ _: RequireAuth,
49
+ front_label: Annotated[UploadFile, File(description="Front label image")],
50
+ back_label: Annotated[UploadFile | None, File(description="Back label image")] = None,
51
+ ) -> dict:
52
+ """Scan wine label images and extract text without creating a wine record.
53
+
54
+ Uses Claude Vision for intelligent label analysis when available,
55
+ falls back to Tesseract OCR otherwise.
56
+ """
57
+ # Read image data
58
+ front_data = await front_label.read()
59
+ await front_label.seek(0)
60
+
61
+ back_data = None
62
+ if back_label and back_label.filename:
63
+ back_data = await back_label.read()
64
+ await back_label.seek(0)
65
+
66
+ # Try Claude Vision first
67
+ if vision_service.is_available():
68
+ logger.info("Using Claude Vision for label analysis")
69
+ try:
70
+ front_media_type = get_media_type(front_label.filename)
71
+ back_media_type = get_media_type(back_label.filename if back_label else None)
72
+
73
+ result = await vision_service.analyze_labels(
74
+ front_image_data=front_data,
75
+ back_image_data=back_data,
76
+ front_media_type=front_media_type,
77
+ back_media_type=back_media_type,
78
+ )
79
+
80
+ return {
81
+ "parsed": {
82
+ "name": result.get("name"),
83
+ "winery": result.get("winery"),
84
+ "vintage": result.get("vintage"),
85
+ "grape_variety": result.get("grape_variety"),
86
+ "region": result.get("region"),
87
+ "country": result.get("country"),
88
+ "alcohol_percentage": result.get("alcohol_percentage"),
89
+ },
90
+ "ocr": {
91
+ "front_label_text": result.get("raw_text", ""),
92
+ "back_label_text": result.get("back_label_text"),
93
+ },
94
+ "method": "claude_vision",
95
+ }
96
+ except Exception as e:
97
+ logger.warning(f"Claude Vision failed, falling back to Tesseract: {e}")
98
+
99
+ # Fall back to Tesseract OCR
100
+ logger.info("Using Tesseract OCR for label analysis")
101
+ front_text = await ocr_service.extract_text_from_bytes(front_data)
102
+
103
+ back_text = None
104
+ if back_data:
105
+ back_text = await ocr_service.extract_text_from_bytes(back_data)
106
+
107
+ # Parse wine details from OCR text
108
+ combined_text = front_text
109
+ if back_text:
110
+ combined_text = f"{front_text}\n{back_text}"
111
+ parsed_data = wine_parser.parse(combined_text)
112
+
113
+ return {
114
+ "parsed": {
115
+ "name": parsed_data.get("name"),
116
+ "winery": parsed_data.get("winery"),
117
+ "vintage": parsed_data.get("vintage"),
118
+ "grape_variety": parsed_data.get("grape_variety"),
119
+ "region": parsed_data.get("region"),
120
+ "country": parsed_data.get("country"),
121
+ "alcohol_percentage": parsed_data.get("alcohol_percentage"),
122
+ },
123
+ "ocr": {
124
+ "front_label_text": front_text,
125
+ "back_label_text": back_text,
126
+ },
127
+ "method": "tesseract",
128
+ }
25
129
 
26
130
 
27
131
  @router.post("/checkin", response_model=WineWithInventory, status_code=status.HTTP_201_CREATED)
@@ -43,26 +147,58 @@ async def checkin_wine(
43
147
  """Check in wine bottles to the cellar.
44
148
 
45
149
  Upload front (required) and back (optional) label images.
46
- OCR will extract text and attempt to identify wine details.
150
+ Uses Claude Vision for intelligent label analysis when available.
47
151
  You can override any auto-detected values.
48
152
  """
153
+ # Read image data for analysis
154
+ front_data = await front_label.read()
155
+ await front_label.seek(0)
156
+
157
+ back_data = None
158
+ if back_label and back_label.filename:
159
+ back_data = await back_label.read()
160
+ await back_label.seek(0)
161
+
49
162
  # Save images
50
163
  front_image_path = await image_storage.save_image(front_label)
51
164
  back_image_path = None
52
165
  if back_label and back_label.filename:
53
166
  back_image_path = await image_storage.save_image(back_label)
54
167
 
55
- # Extract text via OCR
56
- front_text = await ocr_service.extract_text(front_image_path)
168
+ # Try Claude Vision first
169
+ parsed_data = {}
170
+ front_text = ""
57
171
  back_text = None
58
- if back_image_path:
59
- back_text = await ocr_service.extract_text(back_image_path)
60
172
 
61
- # Parse wine details from OCR text
62
- combined_text = front_text
63
- if back_text:
64
- combined_text = f"{front_text}\n{back_text}"
65
- parsed_data = wine_parser.parse(combined_text)
173
+ if vision_service.is_available():
174
+ logger.info("Using Claude Vision for checkin analysis")
175
+ try:
176
+ front_media_type = get_media_type(front_label.filename)
177
+ back_media_type = get_media_type(back_label.filename if back_label else None)
178
+
179
+ result = await vision_service.analyze_labels(
180
+ front_image_data=front_data,
181
+ back_image_data=back_data,
182
+ front_media_type=front_media_type,
183
+ back_media_type=back_media_type,
184
+ )
185
+ parsed_data = result
186
+ front_text = result.get("raw_text", "")
187
+ back_text = result.get("back_label_text")
188
+ except Exception as e:
189
+ logger.warning(f"Claude Vision failed, falling back to Tesseract: {e}")
190
+
191
+ # Fall back to Tesseract if needed
192
+ if not parsed_data.get("name"):
193
+ logger.info("Using Tesseract OCR for checkin analysis")
194
+ front_text = await ocr_service.extract_text(front_image_path)
195
+ if back_image_path:
196
+ back_text = await ocr_service.extract_text(back_image_path)
197
+
198
+ combined_text = front_text
199
+ if back_text:
200
+ combined_text = f"{front_text}\n{back_text}"
201
+ parsed_data = wine_parser.parse(combined_text)
66
202
 
67
203
  # Use provided values or fall back to parsed values
68
204
  wine_name = name or parsed_data.get("name") or "Unknown Wine"
winebox/services/ocr.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """OCR service for extracting text from wine label images."""
2
2
 
3
+ import io
3
4
  import logging
4
5
  from pathlib import Path
5
6
 
@@ -66,6 +67,42 @@ class OCRService:
66
67
  logger.error(f"OCR extraction failed: {e}")
67
68
  return ""
68
69
 
70
+ async def extract_text_from_bytes(self, image_data: bytes) -> str:
71
+ """Extract text from image bytes without saving to disk.
72
+
73
+ Args:
74
+ image_data: Raw image data as bytes.
75
+
76
+ Returns:
77
+ Extracted text from the image.
78
+ """
79
+ try:
80
+ import pytesseract
81
+
82
+ # Open image from bytes
83
+ image = Image.open(io.BytesIO(image_data))
84
+
85
+ # Preprocess image for better OCR results
86
+ # Convert to grayscale
87
+ if image.mode != "L":
88
+ image = image.convert("L")
89
+
90
+ # Extract text
91
+ text = pytesseract.image_to_string(
92
+ image,
93
+ lang="eng",
94
+ config="--psm 6", # Assume uniform block of text
95
+ )
96
+
97
+ return text.strip()
98
+
99
+ except ImportError:
100
+ logger.error("pytesseract is not installed")
101
+ return ""
102
+ except Exception as e:
103
+ logger.error(f"OCR extraction failed: {e}")
104
+ return ""
105
+
69
106
  async def extract_text_with_confidence(
70
107
  self, image_path: str | Path
71
108
  ) -> tuple[str, float]:
@@ -0,0 +1,251 @@
1
+ """Claude Vision service for wine label analysis."""
2
+
3
+ import base64
4
+ import json
5
+ import logging
6
+ import os
7
+ from typing import Any
8
+
9
+ from winebox.config import settings
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ WINE_ANALYSIS_PROMPT = """Analyze this wine label image and extract the following information.
14
+ Return ONLY a valid JSON object with these fields (use null for any field you cannot determine):
15
+
16
+ {
17
+ "name": "The wine name/title",
18
+ "winery": "The winery or producer name",
19
+ "vintage": 2020,
20
+ "grape_variety": "The grape variety (e.g., Cabernet Sauvignon, Chardonnay)",
21
+ "region": "The wine region (e.g., Napa Valley, Bordeaux)",
22
+ "country": "The country of origin",
23
+ "alcohol_percentage": 13.5,
24
+ "raw_text": "All readable text from the label, preserving line breaks"
25
+ }
26
+
27
+ Important:
28
+ - vintage should be a number (year) or null
29
+ - alcohol_percentage should be a number or null
30
+ - Extract ALL visible text for raw_text, including small print
31
+ - If you see multiple wines or labels, focus on the main/primary one
32
+ - Be thorough - wine labels often have text in multiple locations"""
33
+
34
+
35
+ class ClaudeVisionService:
36
+ """Service for analyzing wine labels using Claude's vision capabilities."""
37
+
38
+ def __init__(self) -> None:
39
+ """Initialize the Claude Vision service."""
40
+ self._client = None
41
+
42
+ @property
43
+ def client(self):
44
+ """Lazy-load the Anthropic client."""
45
+ if self._client is None:
46
+ try:
47
+ import anthropic
48
+
49
+ # Check for API key in settings or environment
50
+ api_key = settings.anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")
51
+ if not api_key:
52
+ raise ValueError("No Anthropic API key configured")
53
+
54
+ self._client = anthropic.Anthropic(api_key=api_key)
55
+ except ImportError:
56
+ logger.error("anthropic package is not installed")
57
+ raise
58
+ return self._client
59
+
60
+ def is_available(self) -> bool:
61
+ """Check if Claude Vision is available."""
62
+ try:
63
+ api_key = settings.anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")
64
+ return bool(api_key) and settings.use_claude_vision
65
+ except Exception:
66
+ return False
67
+
68
+ async def analyze_label(
69
+ self,
70
+ image_data: bytes,
71
+ media_type: str = "image/jpeg"
72
+ ) -> dict[str, Any]:
73
+ """Analyze a wine label image using Claude Vision.
74
+
75
+ Args:
76
+ image_data: Raw image data as bytes.
77
+ media_type: MIME type of the image (image/jpeg, image/png, etc.)
78
+
79
+ Returns:
80
+ Dictionary with parsed wine information.
81
+ """
82
+ try:
83
+ # Encode image to base64
84
+ image_base64 = base64.standard_b64encode(image_data).decode("utf-8")
85
+
86
+ # Call Claude API with vision
87
+ message = self.client.messages.create(
88
+ model="claude-sonnet-4-20250514",
89
+ max_tokens=1024,
90
+ messages=[
91
+ {
92
+ "role": "user",
93
+ "content": [
94
+ {
95
+ "type": "image",
96
+ "source": {
97
+ "type": "base64",
98
+ "media_type": media_type,
99
+ "data": image_base64,
100
+ },
101
+ },
102
+ {
103
+ "type": "text",
104
+ "text": WINE_ANALYSIS_PROMPT,
105
+ },
106
+ ],
107
+ }
108
+ ],
109
+ )
110
+
111
+ # Extract the response text
112
+ response_text = message.content[0].text
113
+
114
+ # Parse JSON from response
115
+ # Handle case where Claude might wrap JSON in markdown code blocks
116
+ if "```json" in response_text:
117
+ response_text = response_text.split("```json")[1].split("```")[0]
118
+ elif "```" in response_text:
119
+ response_text = response_text.split("```")[1].split("```")[0]
120
+
121
+ result = json.loads(response_text.strip())
122
+
123
+ # Ensure all expected fields exist
124
+ return {
125
+ "name": result.get("name"),
126
+ "winery": result.get("winery"),
127
+ "vintage": result.get("vintage"),
128
+ "grape_variety": result.get("grape_variety"),
129
+ "region": result.get("region"),
130
+ "country": result.get("country"),
131
+ "alcohol_percentage": result.get("alcohol_percentage"),
132
+ "raw_text": result.get("raw_text", ""),
133
+ }
134
+
135
+ except json.JSONDecodeError as e:
136
+ logger.error(f"Failed to parse Claude response as JSON: {e}")
137
+ logger.debug(f"Response was: {response_text}")
138
+ return self._empty_result()
139
+ except Exception as e:
140
+ logger.error(f"Claude Vision analysis failed: {e}")
141
+ return self._empty_result()
142
+
143
+ async def analyze_labels(
144
+ self,
145
+ front_image_data: bytes,
146
+ back_image_data: bytes | None = None,
147
+ front_media_type: str = "image/jpeg",
148
+ back_media_type: str = "image/jpeg",
149
+ ) -> dict[str, Any]:
150
+ """Analyze front and back wine label images.
151
+
152
+ Args:
153
+ front_image_data: Front label image data.
154
+ back_image_data: Optional back label image data.
155
+ front_media_type: MIME type of front image.
156
+ back_media_type: MIME type of back image.
157
+
158
+ Returns:
159
+ Combined analysis results.
160
+ """
161
+ try:
162
+ # Build message content with images
163
+ content = [
164
+ {
165
+ "type": "image",
166
+ "source": {
167
+ "type": "base64",
168
+ "media_type": front_media_type,
169
+ "data": base64.standard_b64encode(front_image_data).decode("utf-8"),
170
+ },
171
+ },
172
+ {
173
+ "type": "text",
174
+ "text": "Front label:" if back_image_data else WINE_ANALYSIS_PROMPT,
175
+ },
176
+ ]
177
+
178
+ if back_image_data:
179
+ content.extend([
180
+ {
181
+ "type": "image",
182
+ "source": {
183
+ "type": "base64",
184
+ "media_type": back_media_type,
185
+ "data": base64.standard_b64encode(back_image_data).decode("utf-8"),
186
+ },
187
+ },
188
+ {
189
+ "type": "text",
190
+ "text": "Back label:",
191
+ },
192
+ {
193
+ "type": "text",
194
+ "text": WINE_ANALYSIS_PROMPT.replace(
195
+ "this wine label image",
196
+ "these wine label images (front and back)"
197
+ ),
198
+ },
199
+ ])
200
+
201
+ # Call Claude API
202
+ message = self.client.messages.create(
203
+ model="claude-sonnet-4-20250514",
204
+ max_tokens=1024,
205
+ messages=[{"role": "user", "content": content}],
206
+ )
207
+
208
+ response_text = message.content[0].text
209
+
210
+ # Parse JSON
211
+ if "```json" in response_text:
212
+ response_text = response_text.split("```json")[1].split("```")[0]
213
+ elif "```" in response_text:
214
+ response_text = response_text.split("```")[1].split("```")[0]
215
+
216
+ result = json.loads(response_text.strip())
217
+
218
+ return {
219
+ "name": result.get("name"),
220
+ "winery": result.get("winery"),
221
+ "vintage": result.get("vintage"),
222
+ "grape_variety": result.get("grape_variety"),
223
+ "region": result.get("region"),
224
+ "country": result.get("country"),
225
+ "alcohol_percentage": result.get("alcohol_percentage"),
226
+ "raw_text": result.get("raw_text", ""),
227
+ "front_label_text": result.get("raw_text", ""),
228
+ "back_label_text": None, # Combined in raw_text
229
+ }
230
+
231
+ except json.JSONDecodeError as e:
232
+ logger.error(f"Failed to parse Claude response as JSON: {e}")
233
+ return self._empty_result()
234
+ except Exception as e:
235
+ logger.error(f"Claude Vision analysis failed: {e}")
236
+ return self._empty_result()
237
+
238
+ def _empty_result(self) -> dict[str, Any]:
239
+ """Return an empty result dictionary."""
240
+ return {
241
+ "name": None,
242
+ "winery": None,
243
+ "vintage": None,
244
+ "grape_variety": None,
245
+ "region": None,
246
+ "country": None,
247
+ "alcohol_percentage": None,
248
+ "raw_text": "",
249
+ "front_label_text": "",
250
+ "back_label_text": None,
251
+ }