PyPI - winebox - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

winebox 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

winebox/__init__.py +1 -1
winebox/config.py +4 -0
winebox/routers/wines.py +146 -10
winebox/services/ocr.py +37 -0
winebox/services/vision.py +251 -0
winebox/static/css/style.css +344 -0
winebox/static/favicon.svg +22 -0
winebox/static/index.html +75 -1
winebox/static/js/app.js +299 -5
{winebox-0.1.2.dist-info → winebox-0.1.3.dist-info}/METADATA +34 -1
{winebox-0.1.2.dist-info → winebox-0.1.3.dist-info}/RECORD +14 -12
{winebox-0.1.2.dist-info → winebox-0.1.3.dist-info}/WHEEL +0 -0
{winebox-0.1.2.dist-info → winebox-0.1.3.dist-info}/entry_points.txt +0 -0
{winebox-0.1.2.dist-info → winebox-0.1.3.dist-info}/licenses/LICENSE +0 -0

winebox/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """WineBox - Wine Cellar Management Application."""
-__version__ = "0.1.0"
+__version__ = "0.1.3"

winebox/config.py CHANGED Viewed

@@ -35,6 +35,10 @@ class Settings(BaseSettings):
     # OCR
     tesseract_cmd: str | None = None  # Use system default if None
+    # Claude Vision (for wine label scanning)
+    anthropic_api_key: str | None = None  # Set WINEBOX_ANTHROPIC_API_KEY or ANTHROPIC_API_KEY
+    use_claude_vision: bool = True  # Fall back to Tesseract if False or no API key
     # Authentication
     secret_key: str = generate_secret_key()  # Override with WINEBOX_SECRET_KEY env var
     auth_enabled: bool = True  # Set to False to disable authentication

winebox/routers/wines.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Wine management endpoints."""
+import logging
 from typing import Annotated
 from uuid import UUID
@@ -14,14 +15,117 @@ from winebox.schemas.wine import WineCreate, WineResponse, WineUpdate, WineWithI
 from winebox.services.auth import RequireAuth
 from winebox.services.image_storage import ImageStorageService
 from winebox.services.ocr import OCRService
+from winebox.services.vision import ClaudeVisionService
 from winebox.services.wine_parser import WineParserService
+logger = logging.getLogger(__name__)
 router = APIRouter()
 # Service dependencies
 image_storage = ImageStorageService()
 ocr_service = OCRService()
 wine_parser = WineParserService()
+vision_service = ClaudeVisionService()
+def get_media_type(filename: str | None) -> str:
+    """Get media type from filename."""
+    if not filename:
+        return "image/jpeg"
+    ext = filename.lower().split(".")[-1]
+    return {
+        "jpg": "image/jpeg",
+        "jpeg": "image/jpeg",
+        "png": "image/png",
+        "gif": "image/gif",
+        "webp": "image/webp",
+    }.get(ext, "image/jpeg")
+@router.post("/scan")
+async def scan_label(
+    _: RequireAuth,
+    front_label: Annotated[UploadFile, File(description="Front label image")],
+    back_label: Annotated[UploadFile | None, File(description="Back label image")] = None,
+) -> dict:
+    """Scan wine label images and extract text without creating a wine record.
+    Uses Claude Vision for intelligent label analysis when available,
+    falls back to Tesseract OCR otherwise.
+    """
+    # Read image data
+    front_data = await front_label.read()
+    await front_label.seek(0)
+    back_data = None
+    if back_label and back_label.filename:
+        back_data = await back_label.read()
+        await back_label.seek(0)
+    # Try Claude Vision first
+    if vision_service.is_available():
+        logger.info("Using Claude Vision for label analysis")
+        try:
+            front_media_type = get_media_type(front_label.filename)
+            back_media_type = get_media_type(back_label.filename if back_label else None)
+            result = await vision_service.analyze_labels(
+                front_image_data=front_data,
+                back_image_data=back_data,
+                front_media_type=front_media_type,
+                back_media_type=back_media_type,
+            )
+            return {
+                "parsed": {
+                    "name": result.get("name"),
+                    "winery": result.get("winery"),
+                    "vintage": result.get("vintage"),
+                    "grape_variety": result.get("grape_variety"),
+                    "region": result.get("region"),
+                    "country": result.get("country"),
+                    "alcohol_percentage": result.get("alcohol_percentage"),
+                },
+                "ocr": {
+                    "front_label_text": result.get("raw_text", ""),
+                    "back_label_text": result.get("back_label_text"),
+                },
+                "method": "claude_vision",
+            }
+        except Exception as e:
+            logger.warning(f"Claude Vision failed, falling back to Tesseract: {e}")
+    # Fall back to Tesseract OCR
+    logger.info("Using Tesseract OCR for label analysis")
+    front_text = await ocr_service.extract_text_from_bytes(front_data)
+    back_text = None
+    if back_data:
+        back_text = await ocr_service.extract_text_from_bytes(back_data)
+    # Parse wine details from OCR text
+    combined_text = front_text
+    if back_text:
+        combined_text = f"{front_text}\n{back_text}"
+    parsed_data = wine_parser.parse(combined_text)
+    return {
+        "parsed": {
+            "name": parsed_data.get("name"),
+            "winery": parsed_data.get("winery"),
+            "vintage": parsed_data.get("vintage"),
+            "grape_variety": parsed_data.get("grape_variety"),
+            "region": parsed_data.get("region"),
+            "country": parsed_data.get("country"),
+            "alcohol_percentage": parsed_data.get("alcohol_percentage"),
+        },
+        "ocr": {
+            "front_label_text": front_text,
+            "back_label_text": back_text,
+        },
+        "method": "tesseract",
+    }
 @router.post("/checkin", response_model=WineWithInventory, status_code=status.HTTP_201_CREATED)
@@ -43,26 +147,58 @@ async def checkin_wine(
     """Check in wine bottles to the cellar.
     Upload front (required) and back (optional) label images.
-    OCR will extract text and attempt to identify wine details.
+    Uses Claude Vision for intelligent label analysis when available.
     You can override any auto-detected values.
     """
+    # Read image data for analysis
+    front_data = await front_label.read()
+    await front_label.seek(0)
+    back_data = None
+    if back_label and back_label.filename:
+        back_data = await back_label.read()
+        await back_label.seek(0)
     # Save images
     front_image_path = await image_storage.save_image(front_label)
     back_image_path = None
     if back_label and back_label.filename:
         back_image_path = await image_storage.save_image(back_label)
-    # Extract text via OCR
-    front_text = await ocr_service.extract_text(front_image_path)
+    # Try Claude Vision first
+    parsed_data = {}
+    front_text = ""
     back_text = None
-    if back_image_path:
-        back_text = await ocr_service.extract_text(back_image_path)
-    # Parse wine details from OCR text
-    combined_text = front_text
-    if back_text:
-        combined_text = f"{front_text}\n{back_text}"
-    parsed_data = wine_parser.parse(combined_text)
+    if vision_service.is_available():
+        logger.info("Using Claude Vision for checkin analysis")
+        try:
+            front_media_type = get_media_type(front_label.filename)
+            back_media_type = get_media_type(back_label.filename if back_label else None)
+            result = await vision_service.analyze_labels(
+                front_image_data=front_data,
+                back_image_data=back_data,
+                front_media_type=front_media_type,
+                back_media_type=back_media_type,
+            )
+            parsed_data = result
+            front_text = result.get("raw_text", "")
+            back_text = result.get("back_label_text")
+        except Exception as e:
+            logger.warning(f"Claude Vision failed, falling back to Tesseract: {e}")
+    # Fall back to Tesseract if needed
+    if not parsed_data.get("name"):
+        logger.info("Using Tesseract OCR for checkin analysis")
+        front_text = await ocr_service.extract_text(front_image_path)
+        if back_image_path:
+            back_text = await ocr_service.extract_text(back_image_path)
+        combined_text = front_text
+        if back_text:
+            combined_text = f"{front_text}\n{back_text}"
+        parsed_data = wine_parser.parse(combined_text)
     # Use provided values or fall back to parsed values
     wine_name = name or parsed_data.get("name") or "Unknown Wine"

winebox/services/ocr.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """OCR service for extracting text from wine label images."""
+import io
 import logging
 from pathlib import Path
@@ -66,6 +67,42 @@ class OCRService:
             logger.error(f"OCR extraction failed: {e}")
             return ""
+    async def extract_text_from_bytes(self, image_data: bytes) -> str:
+        """Extract text from image bytes without saving to disk.
+        Args:
+            image_data: Raw image data as bytes.
+        Returns:
+            Extracted text from the image.
+        """
+        try:
+            import pytesseract
+            # Open image from bytes
+            image = Image.open(io.BytesIO(image_data))
+            # Preprocess image for better OCR results
+            # Convert to grayscale
+            if image.mode != "L":
+                image = image.convert("L")
+            # Extract text
+            text = pytesseract.image_to_string(
+                image,
+                lang="eng",
+                config="--psm 6",  # Assume uniform block of text
+            )
+            return text.strip()
+        except ImportError:
+            logger.error("pytesseract is not installed")
+            return ""
+        except Exception as e:
+            logger.error(f"OCR extraction failed: {e}")
+            return ""
     async def extract_text_with_confidence(
         self, image_path: str | Path
     ) -> tuple[str, float]:

winebox/services/vision.py ADDED Viewed

@@ -0,0 +1,251 @@
+"""Claude Vision service for wine label analysis."""
+import base64
+import json
+import logging
+import os
+from typing import Any
+from winebox.config import settings
+logger = logging.getLogger(__name__)
+WINE_ANALYSIS_PROMPT = """Analyze this wine label image and extract the following information.
+Return ONLY a valid JSON object with these fields (use null for any field you cannot determine):
+{
+    "name": "The wine name/title",
+    "winery": "The winery or producer name",
+    "vintage": 2020,
+    "grape_variety": "The grape variety (e.g., Cabernet Sauvignon, Chardonnay)",
+    "region": "The wine region (e.g., Napa Valley, Bordeaux)",
+    "country": "The country of origin",
+    "alcohol_percentage": 13.5,
+    "raw_text": "All readable text from the label, preserving line breaks"
+}
+Important:
+- vintage should be a number (year) or null
+- alcohol_percentage should be a number or null
+- Extract ALL visible text for raw_text, including small print
+- If you see multiple wines or labels, focus on the main/primary one
+- Be thorough - wine labels often have text in multiple locations"""
+class ClaudeVisionService:
+    """Service for analyzing wine labels using Claude's vision capabilities."""
+    def __init__(self) -> None:
+        """Initialize the Claude Vision service."""
+        self._client = None
+    @property
+    def client(self):
+        """Lazy-load the Anthropic client."""
+        if self._client is None:
+            try:
+                import anthropic
+                # Check for API key in settings or environment
+                api_key = settings.anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")
+                if not api_key:
+                    raise ValueError("No Anthropic API key configured")
+                self._client = anthropic.Anthropic(api_key=api_key)
+            except ImportError:
+                logger.error("anthropic package is not installed")
+                raise
+        return self._client
+    def is_available(self) -> bool:
+        """Check if Claude Vision is available."""
+        try:
+            api_key = settings.anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")
+            return bool(api_key) and settings.use_claude_vision
+        except Exception:
+            return False
+    async def analyze_label(
+        self,
+        image_data: bytes,
+        media_type: str = "image/jpeg"
+    ) -> dict[str, Any]:
+        """Analyze a wine label image using Claude Vision.
+        Args:
+            image_data: Raw image data as bytes.
+            media_type: MIME type of the image (image/jpeg, image/png, etc.)
+        Returns:
+            Dictionary with parsed wine information.
+        """
+        try:
+            # Encode image to base64
+            image_base64 = base64.standard_b64encode(image_data).decode("utf-8")
+            # Call Claude API with vision
+            message = self.client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=1024,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": media_type,
+                                    "data": image_base64,
+                                },
+                            },
+                            {
+                                "type": "text",
+                                "text": WINE_ANALYSIS_PROMPT,
+                            },
+                        ],
+                    }
+                ],
+            )
+            # Extract the response text
+            response_text = message.content[0].text
+            # Parse JSON from response
+            # Handle case where Claude might wrap JSON in markdown code blocks
+            if "```json" in response_text:
+                response_text = response_text.split("```json")[1].split("```")[0]
+            elif "```" in response_text:
+                response_text = response_text.split("```")[1].split("```")[0]
+            result = json.loads(response_text.strip())
+            # Ensure all expected fields exist
+            return {
+                "name": result.get("name"),
+                "winery": result.get("winery"),
+                "vintage": result.get("vintage"),
+                "grape_variety": result.get("grape_variety"),
+                "region": result.get("region"),
+                "country": result.get("country"),
+                "alcohol_percentage": result.get("alcohol_percentage"),
+                "raw_text": result.get("raw_text", ""),
+            }
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse Claude response as JSON: {e}")
+            logger.debug(f"Response was: {response_text}")
+            return self._empty_result()
+        except Exception as e:
+            logger.error(f"Claude Vision analysis failed: {e}")
+            return self._empty_result()
+    async def analyze_labels(
+        self,
+        front_image_data: bytes,
+        back_image_data: bytes | None = None,
+        front_media_type: str = "image/jpeg",
+        back_media_type: str = "image/jpeg",
+    ) -> dict[str, Any]:
+        """Analyze front and back wine label images.
+        Args:
+            front_image_data: Front label image data.
+            back_image_data: Optional back label image data.
+            front_media_type: MIME type of front image.
+            back_media_type: MIME type of back image.
+        Returns:
+            Combined analysis results.
+        """
+        try:
+            # Build message content with images
+            content = [
+                {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": front_media_type,
+                        "data": base64.standard_b64encode(front_image_data).decode("utf-8"),
+                    },
+                },
+                {
+                    "type": "text",
+                    "text": "Front label:" if back_image_data else WINE_ANALYSIS_PROMPT,
+                },
+            ]
+            if back_image_data:
+                content.extend([
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": back_media_type,
+                            "data": base64.standard_b64encode(back_image_data).decode("utf-8"),
+                        },
+                    },
+                    {
+                        "type": "text",
+                        "text": "Back label:",
+                    },
+                    {
+                        "type": "text",
+                        "text": WINE_ANALYSIS_PROMPT.replace(
+                            "this wine label image",
+                            "these wine label images (front and back)"
+                        ),
+                    },
+                ])
+            # Call Claude API
+            message = self.client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=1024,
+                messages=[{"role": "user", "content": content}],
+            )
+            response_text = message.content[0].text
+            # Parse JSON
+            if "```json" in response_text:
+                response_text = response_text.split("```json")[1].split("```")[0]
+            elif "```" in response_text:
+                response_text = response_text.split("```")[1].split("```")[0]
+            result = json.loads(response_text.strip())
+            return {
+                "name": result.get("name"),
+                "winery": result.get("winery"),
+                "vintage": result.get("vintage"),
+                "grape_variety": result.get("grape_variety"),
+                "region": result.get("region"),
+                "country": result.get("country"),
+                "alcohol_percentage": result.get("alcohol_percentage"),
+                "raw_text": result.get("raw_text", ""),
+                "front_label_text": result.get("raw_text", ""),
+                "back_label_text": None,  # Combined in raw_text
+            }
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse Claude response as JSON: {e}")
+            return self._empty_result()
+        except Exception as e:
+            logger.error(f"Claude Vision analysis failed: {e}")
+            return self._empty_result()
+    def _empty_result(self) -> dict[str, Any]:
+        """Return an empty result dictionary."""
+        return {
+            "name": None,
+            "winery": None,
+            "vintage": None,
+            "grape_variety": None,
+            "region": None,
+            "country": None,
+            "alcohol_percentage": None,
+            "raw_text": "",
+            "front_label_text": "",
+            "back_label_text": None,
+        }

winebox 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

winebox 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl