taster 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. sommelier/__init__.py +2 -0
  2. sommelier/api/__init__.py +4 -0
  3. sommelier/api/app.py +44 -0
  4. sommelier/api/models.py +194 -0
  5. sommelier/api/routers/__init__.py +1 -0
  6. sommelier/api/routers/classify.py +55 -0
  7. sommelier/api/routers/profiles.py +51 -0
  8. sommelier/api/routers/results.py +66 -0
  9. sommelier/api/routers/training.py +43 -0
  10. sommelier/api/services/__init__.py +0 -0
  11. sommelier/api/services/classification_service.py +263 -0
  12. sommelier/api/services/profile_service.py +161 -0
  13. sommelier/api/services/training_service.py +217 -0
  14. sommelier/classification/__init__.py +10 -0
  15. sommelier/classification/classifier.py +708 -0
  16. sommelier/classification/prompt_builder.py +504 -0
  17. sommelier/classification/routing.py +337 -0
  18. sommelier/cli.py +485 -0
  19. sommelier/compat.py +23 -0
  20. sommelier/core/__init__.py +48 -0
  21. sommelier/core/ai_client.py +97 -0
  22. sommelier/core/cache.py +444 -0
  23. sommelier/core/config.py +347 -0
  24. sommelier/core/file_utils.py +374 -0
  25. sommelier/core/logging_config.py +173 -0
  26. sommelier/core/media_prep.py +149 -0
  27. sommelier/core/models.py +537 -0
  28. sommelier/core/profiles.py +422 -0
  29. sommelier/core/provider_factory.py +109 -0
  30. sommelier/core/providers/__init__.py +6 -0
  31. sommelier/core/providers/anthropic_provider.py +213 -0
  32. sommelier/core/providers/gemini.py +5 -0
  33. sommelier/core/providers/openai_provider.py +204 -0
  34. sommelier/dirs.py +79 -0
  35. sommelier/features/__init__.py +15 -0
  36. sommelier/features/burst_detector.py +333 -0
  37. sommelier/features/document_features.py +410 -0
  38. sommelier/features/embeddings.py +322 -0
  39. sommelier/features/quality.py +404 -0
  40. sommelier/mcp/__init__.py +4 -0
  41. sommelier/mcp/server.py +1403 -0
  42. sommelier/pipelines/__init__.py +13 -0
  43. sommelier/pipelines/base.py +224 -0
  44. sommelier/pipelines/document_pipeline.py +134 -0
  45. sommelier/pipelines/mixed_pipeline.py +88 -0
  46. sommelier/pipelines/photo_pipeline.py +210 -0
  47. sommelier/training/__init__.py +13 -0
  48. sommelier/training/sampler.py +220 -0
  49. sommelier/training/session.py +265 -0
  50. sommelier/training/synthesizer.py +555 -0
  51. taster-3.1.0.dist-info/METADATA +549 -0
  52. taster-3.1.0.dist-info/RECORD +54 -0
  53. taster-3.1.0.dist-info/WHEEL +4 -0
  54. taster-3.1.0.dist-info/entry_points.txt +2 -0
sommelier/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ """Sommelier - Teach AI your taste. Apply it to everything."""
2
+ __version__ = "3.1.0"
@@ -0,0 +1,4 @@
1
+ """Sommelier REST API."""
2
+ from .app import create_app
3
+
4
+ __all__ = ["create_app"]
sommelier/api/app.py ADDED
@@ -0,0 +1,44 @@
1
+ """FastAPI application factory for the Sommelier API."""
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+ from sommelier import __version__
6
+
7
+
8
+ def create_app() -> FastAPI:
9
+ """Create and configure the FastAPI application."""
10
+ app = FastAPI(
11
+ title="Sommelier API",
12
+ description="Universal AI-powered media classification platform",
13
+ version=__version__,
14
+ )
15
+
16
+ # CORS middleware for frontend access
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # Mount routers
26
+ from sommelier.api.routers.profiles import router as profiles_router
27
+ from sommelier.api.routers.classify import router as classify_router
28
+ from sommelier.api.routers.results import router as results_router
29
+ from sommelier.api.routers.training import router as training_router
30
+
31
+ app.include_router(profiles_router)
32
+ app.include_router(classify_router)
33
+ app.include_router(results_router)
34
+ app.include_router(training_router)
35
+
36
+ @app.get("/")
37
+ async def root():
38
+ return {
39
+ "name": "Sommelier API",
40
+ "version": __version__,
41
+ "docs": "/docs",
42
+ }
43
+
44
+ return app
@@ -0,0 +1,194 @@
1
+ """Pydantic models for the API layer."""
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ # ── Profile models ──────────────────────────────────────────────────────
10
+
11
+ class CategoryDefinitionModel(BaseModel):
12
+ """A single output category."""
13
+ name: str
14
+ description: str
15
+ color: Optional[str] = None
16
+
17
+
18
+ class PhotoProfileSettingsModel(BaseModel):
19
+ """Photo/video-specific settings."""
20
+ enable_burst_detection: bool = True
21
+ enable_face_detection: bool = True
22
+ contains_children_check: bool = True
23
+ appropriateness_check: bool = True
24
+
25
+
26
+ class DocumentProfileSettingsModel(BaseModel):
27
+ """Document-specific settings."""
28
+ extract_text: bool = True
29
+ extract_metadata: bool = True
30
+ enable_similarity_grouping: bool = True
31
+ similarity_threshold: float = 0.85
32
+ max_pages_to_analyze: int = 10
33
+
34
+
35
+ class ProfileCreate(BaseModel):
36
+ """Request body for creating a new profile."""
37
+ name: str = Field(..., min_length=1, max_length=128)
38
+ description: str = ""
39
+ media_types: List[str] = Field(default_factory=lambda: ["image"])
40
+ categories: List[CategoryDefinitionModel] = Field(default_factory=list)
41
+ default_category: str = "Review"
42
+ top_priorities: List[str] = Field(default_factory=list)
43
+ positive_criteria: Dict[str, List[str]] = Field(default_factory=dict)
44
+ negative_criteria: Dict[str, List[str]] = Field(default_factory=dict)
45
+ specific_guidance: List[str] = Field(default_factory=list)
46
+ philosophy: str = ""
47
+ thresholds: Dict[str, float] = Field(default_factory=dict)
48
+ photo_settings: Optional[PhotoProfileSettingsModel] = None
49
+ document_settings: Optional[DocumentProfileSettingsModel] = None
50
+
51
+
52
+ class ProfileUpdate(BaseModel):
53
+ """Request body for updating a profile (all fields optional)."""
54
+ description: Optional[str] = None
55
+ media_types: Optional[List[str]] = None
56
+ categories: Optional[List[CategoryDefinitionModel]] = None
57
+ default_category: Optional[str] = None
58
+ top_priorities: Optional[List[str]] = None
59
+ positive_criteria: Optional[Dict[str, List[str]]] = None
60
+ negative_criteria: Optional[Dict[str, List[str]]] = None
61
+ specific_guidance: Optional[List[str]] = None
62
+ philosophy: Optional[str] = None
63
+ thresholds: Optional[Dict[str, float]] = None
64
+ photo_settings: Optional[PhotoProfileSettingsModel] = None
65
+ document_settings: Optional[DocumentProfileSettingsModel] = None
66
+
67
+
68
+ class ProfileSummary(BaseModel):
69
+ """Summary view of a profile (for listing)."""
70
+ name: str
71
+ description: str
72
+ media_types: List[str]
73
+ categories: List[str]
74
+ created_at: str
75
+ updated_at: str
76
+ version: int
77
+
78
+
79
+ class ProfileDetail(BaseModel):
80
+ """Full detail view of a profile."""
81
+ name: str
82
+ description: str
83
+ media_types: List[str]
84
+ categories: List[CategoryDefinitionModel]
85
+ default_category: str
86
+ top_priorities: List[str]
87
+ positive_criteria: Dict[str, List[str]]
88
+ negative_criteria: Dict[str, List[str]]
89
+ specific_guidance: List[str]
90
+ philosophy: str
91
+ thresholds: Dict[str, float]
92
+ photo_settings: Optional[PhotoProfileSettingsModel] = None
93
+ document_settings: Optional[DocumentProfileSettingsModel] = None
94
+ created_at: str
95
+ updated_at: str
96
+ version: int
97
+
98
+
99
+ # ── Classification / job models ─────────────────────────────────────────
100
+
101
+ class JobStatus(str, Enum):
102
+ """Status of a classification job."""
103
+ PENDING = "pending"
104
+ RUNNING = "running"
105
+ COMPLETED = "completed"
106
+ FAILED = "failed"
107
+
108
+
109
+ class ClassifyFolderRequest(BaseModel):
110
+ """Request body for classifying a local folder."""
111
+ folder_path: str
112
+ profile_name: str = "default-photos"
113
+ dry_run: bool = False
114
+
115
+
116
+ class JobStatusResponse(BaseModel):
117
+ """Status information for a classification job."""
118
+ job_id: str
119
+ status: JobStatus
120
+ profile_name: str
121
+ folder_path: Optional[str] = None
122
+ progress: float = 0.0
123
+ total_files: int = 0
124
+ processed_files: int = 0
125
+ created_at: str
126
+ completed_at: Optional[str] = None
127
+ error: Optional[str] = None
128
+
129
+
130
+ class ClassificationResultItem(BaseModel):
131
+ """A single file's classification result."""
132
+ file_path: str
133
+ file_name: str
134
+ category: str
135
+ confidence: Optional[float] = None
136
+ reasoning: Optional[str] = None
137
+ burst_size: int = 1
138
+
139
+
140
+ class JobResultsResponse(BaseModel):
141
+ """Full results for a completed classification job."""
142
+ job_id: str
143
+ status: JobStatus
144
+ profile_name: str
145
+ stats: Dict[str, int] = Field(default_factory=dict)
146
+ results: List[ClassificationResultItem] = Field(default_factory=list)
147
+
148
+
149
+ # ── Results / export models ──────────────────────────────────────────────
150
+
151
+ class FileInCategory(BaseModel):
152
+ """A file listed within a category."""
153
+ file_path: str
154
+ file_name: str
155
+ confidence: Optional[float] = None
156
+ reasoning: Optional[str] = None
157
+
158
+
159
+ class CategoryFiles(BaseModel):
160
+ """Files belonging to a specific category."""
161
+ category: str
162
+ count: int
163
+ files: List[FileInCategory]
164
+
165
+
166
+ # ── Training / feedback models ───────────────────────────────────────────
167
+
168
+ class FeedbackItem(BaseModel):
169
+ """User feedback on a single classification."""
170
+ file_path: str
171
+ original_category: str
172
+ corrected_category: str
173
+ notes: Optional[str] = None
174
+
175
+
176
+ class FeedbackRequest(BaseModel):
177
+ """Request body for submitting feedback."""
178
+ job_id: Optional[str] = None
179
+ profile_name: str = "default-photos"
180
+ feedback: List[FeedbackItem]
181
+
182
+
183
+ class GenerateProfileRequest(BaseModel):
184
+ """Request body for generating a profile from feedback."""
185
+ profile_name: str = Field(..., min_length=1)
186
+ base_profile: Optional[str] = None
187
+ examples_folder: Optional[str] = None
188
+
189
+
190
+ class TrainingStats(BaseModel):
191
+ """Statistics about collected training data."""
192
+ total_feedback_items: int = 0
193
+ profiles_with_feedback: List[str] = Field(default_factory=list)
194
+ corrections_by_category: Dict[str, int] = Field(default_factory=dict)
@@ -0,0 +1 @@
1
+ """API routers."""
@@ -0,0 +1,55 @@
1
+ """Classification endpoints."""
2
+ from fastapi import APIRouter, HTTPException
3
+
4
+ from sommelier.api.models import ClassifyFolderRequest, JobStatusResponse
5
+ from sommelier.api.services.classification_service import ClassificationService
6
+ from sommelier.core.config import load_config
7
+
8
+ router = APIRouter(prefix="/api/classify", tags=["classify"])
9
+
10
+ _service = None
11
+
12
+
13
+ def _get_service() -> ClassificationService:
14
+ global _service
15
+ if _service is None:
16
+ config = load_config()
17
+ _service = ClassificationService(config)
18
+ return _service
19
+
20
+
21
+ @router.post("/folder")
22
+ async def classify_folder(request: ClassifyFolderRequest):
23
+ """Start a classification job on a local folder."""
24
+ try:
25
+ svc = _get_service()
26
+ job_id = svc.start_job(
27
+ folder_path=request.folder_path,
28
+ profile_name=request.profile_name,
29
+ dry_run=request.dry_run,
30
+ )
31
+ return {"job_id": job_id, "status": "started"}
32
+ except FileNotFoundError as e:
33
+ raise HTTPException(status_code=404, detail=str(e))
34
+ except ValueError as e:
35
+ raise HTTPException(status_code=400, detail=str(e))
36
+
37
+
38
+ @router.get("/{job_id}")
39
+ async def get_job_status(job_id: str):
40
+ """Get the status of a classification job."""
41
+ svc = _get_service()
42
+ status = svc.get_job_status(job_id)
43
+ if status is None:
44
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
45
+ return status
46
+
47
+
48
+ @router.get("/{job_id}/results")
49
+ async def get_job_results(job_id: str):
50
+ """Get the results of a completed classification job."""
51
+ svc = _get_service()
52
+ results = svc.get_job_results(job_id)
53
+ if results is None:
54
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
55
+ return results
@@ -0,0 +1,51 @@
1
+ """Profile CRUD endpoints."""
2
+ from fastapi import APIRouter, HTTPException
3
+
4
+ from sommelier.api.models import ProfileCreate, ProfileUpdate, ProfileSummary, ProfileDetail
5
+ from sommelier.api.services.profile_service import ProfileService
6
+
7
+ router = APIRouter(prefix="/api/profiles", tags=["profiles"])
8
+
9
+ _service = ProfileService()
10
+
11
+
12
+ @router.get("/", response_model=list[ProfileSummary])
13
+ async def list_profiles():
14
+ """List all available taste profiles."""
15
+ return _service.list_profiles()
16
+
17
+
18
+ @router.get("/{name}")
19
+ async def get_profile(name: str):
20
+ """Get full details of a taste profile."""
21
+ try:
22
+ return _service.get_profile(name)
23
+ except FileNotFoundError:
24
+ raise HTTPException(status_code=404, detail=f"Profile '{name}' not found")
25
+
26
+
27
+ @router.post("/", status_code=201)
28
+ async def create_profile(data: ProfileCreate):
29
+ """Create a new taste profile."""
30
+ try:
31
+ return _service.create_profile(data.model_dump())
32
+ except ValueError as e:
33
+ raise HTTPException(status_code=400, detail=str(e))
34
+
35
+
36
+ @router.put("/{name}")
37
+ async def update_profile(name: str, data: ProfileUpdate):
38
+ """Update an existing taste profile."""
39
+ try:
40
+ updates = {k: v for k, v in data.model_dump().items() if v is not None}
41
+ return _service.update_profile(name, updates)
42
+ except FileNotFoundError:
43
+ raise HTTPException(status_code=404, detail=f"Profile '{name}' not found")
44
+
45
+
46
+ @router.delete("/{name}")
47
+ async def delete_profile(name: str):
48
+ """Delete a taste profile."""
49
+ if _service.delete_profile(name):
50
+ return {"status": "deleted", "name": name}
51
+ raise HTTPException(status_code=404, detail=f"Profile '{name}' not found")
@@ -0,0 +1,66 @@
1
+ """Results and export endpoints."""
2
+ import csv
3
+ import io
4
+ from fastapi import APIRouter, HTTPException
5
+ from fastapi.responses import StreamingResponse
6
+
7
+ from sommelier.api.services.classification_service import ClassificationService
8
+ from sommelier.core.config import load_config
9
+
10
+ router = APIRouter(prefix="/api/results", tags=["results"])
11
+
12
+ _service = None
13
+
14
+
15
+ def _get_service() -> ClassificationService:
16
+ global _service
17
+ if _service is None:
18
+ config = load_config()
19
+ _service = ClassificationService(config)
20
+ return _service
21
+
22
+
23
+ @router.get("/{job_id}")
24
+ async def get_results(job_id: str):
25
+ """Get detailed results for a classification job."""
26
+ svc = _get_service()
27
+ results = svc.get_job_results(job_id)
28
+ if results is None:
29
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
30
+ return results
31
+
32
+
33
+ @router.get("/{job_id}/export")
34
+ async def export_results(job_id: str, format: str = "csv"):
35
+ """Export classification results as CSV."""
36
+ svc = _get_service()
37
+ results = svc.get_job_results(job_id)
38
+ if results is None:
39
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
40
+
41
+ if format == "csv":
42
+ output = io.StringIO()
43
+ if results:
44
+ writer = csv.DictWriter(output, fieldnames=results[0].keys())
45
+ writer.writeheader()
46
+ writer.writerows(results)
47
+ content = output.getvalue()
48
+ return StreamingResponse(
49
+ iter([content]),
50
+ media_type="text/csv",
51
+ headers={"Content-Disposition": f"attachment; filename=results_{job_id}.csv"},
52
+ )
53
+
54
+ return results
55
+
56
+
57
+ @router.get("/{job_id}/files/{category}")
58
+ async def list_files_in_category(job_id: str, category: str):
59
+ """List files classified into a specific category."""
60
+ svc = _get_service()
61
+ results = svc.get_job_results(job_id)
62
+ if results is None:
63
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found")
64
+
65
+ filtered = [r for r in results if r.get("destination") == category]
66
+ return {"category": category, "count": len(filtered), "files": filtered}
@@ -0,0 +1,43 @@
1
+ """Training and feedback endpoints."""
2
+ from fastapi import APIRouter, HTTPException
3
+
4
+ from sommelier.api.models import FeedbackRequest, TrainingStats
5
+ from sommelier.api.services.training_service import TrainingService
6
+
7
+ router = APIRouter(prefix="/api/training", tags=["training"])
8
+
9
+ _service = TrainingService()
10
+
11
+
12
+ @router.post("/feedback")
13
+ async def submit_feedback(request: FeedbackRequest):
14
+ """Submit classification feedback/corrections."""
15
+ results = []
16
+ for item in request.feedback:
17
+ result = _service.submit_feedback(
18
+ file_path=item.file_path,
19
+ correct_category=item.corrected_category,
20
+ reasoning=item.notes or "",
21
+ )
22
+ results.append(result)
23
+ return {"status": "received", "count": len(results)}
24
+
25
+
26
+ @router.post("/generate-profile")
27
+ async def generate_profile(profile_name: str):
28
+ """Generate a taste profile from accumulated feedback."""
29
+ try:
30
+ profile = _service.generate_profile_from_feedback(profile_name)
31
+ return {"status": "created", "profile": profile}
32
+ except ValueError as e:
33
+ raise HTTPException(status_code=400, detail=str(e))
34
+
35
+
36
+ @router.get("/stats", response_model=TrainingStats)
37
+ async def get_stats():
38
+ """Get training data statistics."""
39
+ stats = _service.get_stats()
40
+ return TrainingStats(
41
+ total_feedback_items=stats.get("total_feedback", 0),
42
+ corrections_by_category=stats.get("by_category", {}),
43
+ )
File without changes