vocal-api 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual Environment
25
+ .venv/
26
+ venv/
27
+ ENV/
28
+ env/
29
+
30
+ # UV
31
+ uv.lock
32
+
33
+ # IDE
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ *~
39
+ .DS_Store
40
+
41
+ # Testing
42
+ .pytest_cache/
43
+ .coverage
44
+ htmlcov/
45
+ .tox/
46
+
47
+ # Jupyter
48
+ .ipynb_checkpoints
49
+
50
+ # Model cache
51
+ .cache/
52
+ models/
53
+ *.ckpt
54
+ *.pth
55
+ *.pt
56
+ *.safetensors
57
+
58
+ # Audio files (test data)
59
+ *.mp3
60
+ *.wav
61
+ *.m4a
62
+ *.ogg
63
+ *.flac
64
+
65
+ # Logs
66
+ *.log
67
+ logs/
68
+
69
+ # Environment variables
70
+ .env
71
+ .env.local
72
+
73
+ # OS
74
+ Thumbs.db
@@ -0,0 +1,3 @@
1
+ include ../../../README.md
2
+ include ../../../LICENSE
3
+ recursive-include vocal_api *.py
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: vocal-api
3
+ Version: 0.3.0
4
+ Summary: FastAPI server for Vocal - Ollama-style Voice Model Management
5
+ Project-URL: Homepage, https://github.com/niradler/vocal
6
+ Project-URL: Documentation, https://github.com/niradler/vocal#readme
7
+ Project-URL: Repository, https://github.com/niradler/vocal
8
+ Project-URL: Issues, https://github.com/niradler/vocal/issues
9
+ Author: Vocal Contributors
10
+ License: SSPL-1.0
11
+ Keywords: api,fastapi,openai-compatible,speech-to-text,stt,tts
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Framework :: FastAPI
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: Other/Proprietary License
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: aiofiles>=25.1.0
22
+ Requires-Dist: fastapi>=0.109.0
23
+ Requires-Dist: pydantic>=2.5.0
24
+ Requires-Dist: python-multipart>=0.0.6
25
+ Requires-Dist: uvicorn[standard]>=0.27.0
26
+ Requires-Dist: vocal-core>=0.3.0
@@ -0,0 +1,38 @@
1
+ [project]
2
+ name = "vocal-api"
3
+ version = "0.3.0"
4
+ description = "FastAPI server for Vocal - Ollama-style Voice Model Management"
5
+ requires-python = ">=3.11"
6
+ license = { text = "SSPL-1.0" }
7
+ authors = [
8
+ { name = "Vocal Contributors" }
9
+ ]
10
+ keywords = ["fastapi", "api", "speech-to-text", "tts", "stt", "openai-compatible"]
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "Intended Audience :: Developers",
14
+ "License :: Other/Proprietary License",
15
+ "Programming Language :: Python :: 3.11",
16
+ "Programming Language :: Python :: 3.12",
17
+ "Programming Language :: Python :: 3.13",
18
+ "Framework :: FastAPI",
19
+ "Topic :: Internet :: WWW/HTTP :: HTTP Servers",
20
+ ]
21
+ dependencies = [
22
+ "fastapi>=0.109.0",
23
+ "uvicorn[standard]>=0.27.0",
24
+ "pydantic>=2.5.0",
25
+ "python-multipart>=0.0.6",
26
+ "vocal-core>=0.3.0",
27
+ "aiofiles>=25.1.0",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/niradler/vocal"
32
+ Documentation = "https://github.com/niradler/vocal#readme"
33
+ Repository = "https://github.com/niradler/vocal"
34
+ Issues = "https://github.com/niradler/vocal/issues"
35
+
36
+ [build-system]
37
+ requires = ["hatchling"]
38
+ build-backend = "hatchling.build"
@@ -0,0 +1,9 @@
1
+ from .config import settings
2
+ from .main import app
3
+
4
+ __version__ = settings.VERSION
5
+
6
+ __all__ = [
7
+ "app",
8
+ "settings",
9
+ ]
@@ -0,0 +1,21 @@
1
+ from pydantic_settings import BaseSettings
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ """Application settings"""
6
+
7
+ APP_NAME: str = "Vocal API"
8
+ VERSION: str = "0.1.0"
9
+ DEBUG: bool = True
10
+
11
+ CORS_ORIGINS: list[str] = ["*"]
12
+
13
+ MAX_UPLOAD_SIZE: int = 25 * 1024 * 1024
14
+
15
+ model_config = {
16
+ "env_file": ".env",
17
+ "case_sensitive": True,
18
+ }
19
+
20
+
21
+ settings = Settings()
@@ -0,0 +1,43 @@
1
+ from vocal_core import ModelRegistry
2
+
3
+ from .services import ModelService, TranscriptionService, TTSService
4
+
5
+ _registry: ModelRegistry | None = None
6
+ _transcription_service: TranscriptionService | None = None
7
+ _model_service: ModelService | None = None
8
+ _tts_service: TTSService | None = None
9
+
10
+
11
+ def get_registry() -> ModelRegistry:
12
+ """Get or create ModelRegistry singleton"""
13
+ global _registry
14
+ if _registry is None:
15
+ _registry = ModelRegistry()
16
+ return _registry
17
+
18
+
19
+ def get_transcription_service() -> TranscriptionService:
20
+ """Get or create TranscriptionService singleton"""
21
+ global _transcription_service
22
+ if _transcription_service is None:
23
+ registry = get_registry()
24
+ _transcription_service = TranscriptionService(registry, keep_alive_seconds=300)
25
+ return _transcription_service
26
+
27
+
28
+ def get_model_service() -> ModelService:
29
+ """Get or create ModelService singleton"""
30
+ global _model_service
31
+ if _model_service is None:
32
+ registry = get_registry()
33
+ _model_service = ModelService(registry)
34
+ return _model_service
35
+
36
+
37
+ def get_tts_service() -> TTSService:
38
+ """Get or create TTSService singleton"""
39
+ global _tts_service
40
+ if _tts_service is None:
41
+ registry = get_registry()
42
+ _tts_service = TTSService(registry, keep_alive_seconds=300)
43
+ return _tts_service
@@ -0,0 +1,75 @@
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.openapi.utils import get_openapi
4
+
5
+ from .config import settings
6
+ from .dependencies import get_transcription_service, get_tts_service
7
+ from .routes import models_router, system_router, transcription_router, tts_router
8
+
9
+ app = FastAPI(
10
+ title="Vocal API",
11
+ description="Generic Speech AI Platform (STT + TTS)",
12
+ version=settings.VERSION,
13
+ docs_url="/docs",
14
+ redoc_url="/redoc",
15
+ )
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=settings.CORS_ORIGINS,
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ app.include_router(transcription_router)
26
+ app.include_router(models_router)
27
+ app.include_router(tts_router)
28
+ app.include_router(system_router)
29
+
30
+
31
+ @app.on_event("startup")
32
+ async def startup_event():
33
+ """Initialize services and start background tasks"""
34
+ transcription_service = get_transcription_service()
35
+ await transcription_service.start_cleanup_task()
36
+
37
+ tts_service = get_tts_service()
38
+ await tts_service.start_cleanup_task()
39
+
40
+
41
+ @app.get("/", tags=["health"])
42
+ async def root():
43
+ """API health check"""
44
+ return {
45
+ "status": "ok",
46
+ "message": "Vocal API - Ollama-style voice model management",
47
+ "version": settings.VERSION,
48
+ }
49
+
50
+
51
+ @app.get("/health", tags=["health"])
52
+ async def health():
53
+ """Detailed health check"""
54
+ return {
55
+ "status": "healthy",
56
+ "api_version": settings.VERSION,
57
+ }
58
+
59
+
60
+ def custom_openapi():
61
+ if app.openapi_schema:
62
+ return app.openapi_schema
63
+
64
+ openapi_schema = get_openapi(
65
+ title="Vocal API",
66
+ version=settings.VERSION,
67
+ description="Generic Speech AI Platform (STT + TTS)",
68
+ routes=app.routes,
69
+ )
70
+
71
+ app.openapi_schema = openapi_schema
72
+ return app.openapi_schema
73
+
74
+
75
+ app.openapi = custom_openapi
@@ -0,0 +1,11 @@
1
+ from .models import router as models_router
2
+ from .system import router as system_router
3
+ from .transcription import router as transcription_router
4
+ from .tts import tts_router
5
+
6
+ __all__ = [
7
+ "transcription_router",
8
+ "models_router",
9
+ "tts_router",
10
+ "system_router",
11
+ ]
@@ -0,0 +1,122 @@
1
+ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
2
+
3
+ from ..dependencies import get_model_service
4
+ from ..models.model import (
5
+ ModelDownloadProgress,
6
+ ModelInfo,
7
+ ModelListResponse,
8
+ )
9
+ from ..services import ModelService
10
+
11
+ router = APIRouter(prefix="/v1/models", tags=["models"])
12
+
13
+
14
+ @router.get(
15
+ "",
16
+ response_model=ModelListResponse,
17
+ summary="List models",
18
+ description="List all available models (Ollama-style)",
19
+ )
20
+ async def list_models(
21
+ status: str | None = None,
22
+ task: str | None = None,
23
+ service: ModelService = Depends(get_model_service),
24
+ ) -> ModelListResponse:
25
+ """
26
+ List all available models
27
+
28
+ Query params:
29
+ - status: Filter by status (available, downloading, not_downloaded)
30
+ - task: Filter by task (stt, tts)
31
+ """
32
+ models = await service.list_models(status_filter=status, task=task)
33
+ return ModelListResponse(models=models, total=len(models))
34
+
35
+
36
+ @router.get(
37
+ "/{model_id:path}",
38
+ response_model=ModelInfo,
39
+ summary="Get model info",
40
+ description="Get detailed information about a specific model",
41
+ )
42
+ async def get_model(model_id: str, service: ModelService = Depends(get_model_service)) -> ModelInfo:
43
+ """Get detailed model information"""
44
+ model = await service.get_model(model_id)
45
+ if not model:
46
+ raise HTTPException(404, f"Model {model_id} not found")
47
+ return model
48
+
49
+
50
+ @router.post(
51
+ "/{model_id:path}/download",
52
+ response_model=ModelDownloadProgress,
53
+ summary="Download model",
54
+ description="Download a model for local use (Ollama-style pull)",
55
+ )
56
+ async def download_model(
57
+ model_id: str,
58
+ background_tasks: BackgroundTasks,
59
+ service: ModelService = Depends(get_model_service),
60
+ ) -> ModelDownloadProgress:
61
+ """
62
+ Start downloading a model
63
+
64
+ Returns immediately with initial status.
65
+ Check progress with GET /models/{model_id}/download/status
66
+ """
67
+ model = await service.get_model(model_id)
68
+ if not model:
69
+ raise HTTPException(404, f"Model {model_id} not found")
70
+
71
+ async def download_task():
72
+ async for progress in service.download_model(model_id):
73
+ pass
74
+
75
+ background_tasks.add_task(download_task)
76
+
77
+ return ModelDownloadProgress(
78
+ model_id=model_id,
79
+ status="downloading",
80
+ progress=0.0,
81
+ downloaded_bytes=0,
82
+ total_bytes=0,
83
+ message="Starting download...",
84
+ )
85
+
86
+
87
+ @router.get(
88
+ "/{model_id:path}/download/status",
89
+ response_model=ModelDownloadProgress,
90
+ summary="Get download status",
91
+ )
92
+ async def get_download_status(model_id: str, service: ModelService = Depends(get_model_service)) -> ModelDownloadProgress:
93
+ """Check model download progress"""
94
+ status = await service.get_download_status(model_id)
95
+ if not status:
96
+ model = await service.get_model(model_id)
97
+ if not model:
98
+ raise HTTPException(404, f"Model {model_id} not found")
99
+
100
+ if model.status == "available":
101
+ return ModelDownloadProgress(
102
+ model_id=model_id,
103
+ status="available",
104
+ progress=1.0,
105
+ downloaded_bytes=model.size,
106
+ total_bytes=model.size,
107
+ message="Model already downloaded",
108
+ )
109
+
110
+ raise HTTPException(404, "No active download for this model")
111
+
112
+ return status
113
+
114
+
115
+ @router.delete("/{model_id:path}", summary="Delete model", description="Remove a downloaded model")
116
+ async def delete_model(model_id: str, service: ModelService = Depends(get_model_service)):
117
+ """Delete a downloaded model"""
118
+ success = await service.delete_model(model_id)
119
+ if not success:
120
+ raise HTTPException(404, f"Model {model_id} not found or not downloaded")
121
+
122
+ return {"status": "deleted", "model_id": model_id}
@@ -0,0 +1,16 @@
1
+ from fastapi import APIRouter
2
+
3
+ from vocal_core.utils import get_device_info
4
+
5
+ router = APIRouter(prefix="/v1/system", tags=["system"])
6
+
7
+
8
+ @router.get("/device")
9
+ async def get_device():
10
+ """
11
+ Get device and hardware information
12
+
13
+ Returns information about available compute devices (CPU, GPU)
14
+ and optimization settings being used.
15
+ """
16
+ return get_device_info()
@@ -0,0 +1,81 @@
1
+ from typing import Annotated
2
+
3
+ from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
4
+
5
+ from ..config import settings
6
+ from ..dependencies import get_transcription_service
7
+ from ..models.transcription import (
8
+ TranscriptionFormat,
9
+ TranscriptionRequest,
10
+ TranscriptionResponse,
11
+ )
12
+ from ..services import TranscriptionService
13
+
14
+ router = APIRouter(prefix="/v1/audio", tags=["transcription"])
15
+
16
+
17
+ @router.post(
18
+ "/transcriptions",
19
+ response_model=TranscriptionResponse,
20
+ summary="Transcribe audio",
21
+ description="Transcribe audio file to text using specified model",
22
+ )
23
+ async def create_transcription(
24
+ file: Annotated[UploadFile, File(description="Audio file to transcribe")],
25
+ model: Annotated[str, Form(description="Model ID")] = "Systran/faster-whisper-tiny",
26
+ language: Annotated[str | None, Form(description="Language code")] = None,
27
+ prompt: Annotated[str | None, Form(description="Style prompt")] = None,
28
+ response_format: Annotated[TranscriptionFormat, Form(description="Output format")] = TranscriptionFormat.JSON,
29
+ temperature: Annotated[float, Form(ge=0.0, le=1.0)] = 0.0,
30
+ service: TranscriptionService = Depends(get_transcription_service),
31
+ ) -> TranscriptionResponse:
32
+ """
33
+ Transcribe an audio file.
34
+
35
+ **Supported formats:** mp3, mp4, wav, m4a, flac, ogg, webm
36
+ **Max file size:** 25MB
37
+
38
+ Returns transcription with optional word/segment timestamps.
39
+ """
40
+ file.file.seek(0, 2)
41
+ size = file.file.tell()
42
+ file.file.seek(0)
43
+
44
+ if size > settings.MAX_UPLOAD_SIZE:
45
+ raise HTTPException(400, f"File too large. Max {settings.MAX_UPLOAD_SIZE // (1024 * 1024)}MB.")
46
+
47
+ request = TranscriptionRequest(
48
+ model=model,
49
+ language=language,
50
+ prompt=prompt,
51
+ response_format=response_format,
52
+ temperature=temperature,
53
+ )
54
+
55
+ try:
56
+ result = await service.transcribe(file, request)
57
+ return result
58
+ except ValueError as e:
59
+ raise HTTPException(400, str(e))
60
+ except Exception as e:
61
+ raise HTTPException(500, f"Transcription failed: {str(e)}")
62
+
63
+
64
+ @router.post(
65
+ "/translations",
66
+ response_model=TranscriptionResponse,
67
+ summary="Translate audio to English",
68
+ description="Translate audio to English text",
69
+ )
70
+ async def create_translation(
71
+ file: Annotated[UploadFile, File()],
72
+ model: Annotated[str, Form()] = "Systran/faster-whisper-tiny",
73
+ service: TranscriptionService = Depends(get_transcription_service),
74
+ ) -> TranscriptionResponse:
75
+ """Translate audio to English."""
76
+ try:
77
+ return await service.translate(file, model)
78
+ except ValueError as e:
79
+ raise HTTPException(400, str(e))
80
+ except Exception as e:
81
+ raise HTTPException(500, f"Translation failed: {str(e)}")
@@ -0,0 +1,98 @@
1
+ from fastapi import APIRouter, Depends, HTTPException, Response
2
+ from pydantic import BaseModel, Field
3
+
4
+ from ..dependencies import get_tts_service
5
+ from ..services.tts_service import TTSService
6
+
7
+ router = APIRouter(prefix="/v1/audio", tags=["audio"])
8
+
9
+
10
+ class TTSRequest(BaseModel):
11
+ """Text-to-Speech request (OpenAI-compatible)"""
12
+
13
+ model: str = Field(..., description="TTS model to use (e.g., 'hexgrad/Kokoro-82M')")
14
+ input: str = Field(..., description="The text to synthesize")
15
+ voice: str | None = Field(None, description="Voice ID to use")
16
+ speed: float = Field(1.0, ge=0.25, le=4.0, description="Speech speed multiplier")
17
+ response_format: str = Field("wav", description="Audio format (currently only 'wav' supported)")
18
+
19
+
20
+ class VoiceInfo(BaseModel):
21
+ """Voice information"""
22
+
23
+ id: str
24
+ name: str
25
+ language: str
26
+ gender: str | None = None
27
+
28
+
29
+ class VoicesResponse(BaseModel):
30
+ """Response containing list of voices"""
31
+
32
+ voices: list[VoiceInfo]
33
+ total: int
34
+
35
+
36
+ @router.post(
37
+ "/speech",
38
+ response_class=Response,
39
+ responses={200: {"content": {"audio/wav": {}}, "description": "Audio file in WAV format"}},
40
+ )
41
+ async def text_to_speech(request: TTSRequest, service: TTSService = Depends(get_tts_service)):
42
+ """
43
+ Generate speech from text (OpenAI-compatible endpoint)
44
+
45
+ This endpoint synthesizes audio from text using the specified TTS model.
46
+
47
+ - **model**: TTS model to use (e.g., 'hexgrad/Kokoro-82M', 'coqui/XTTS-v2')
48
+ - **input**: The text to convert to speech
49
+ - **voice**: Optional voice ID (use /v1/audio/voices to list available voices)
50
+ - **speed**: Speech speed multiplier (0.25 to 4.0, default: 1.0)
51
+ - **response_format**: Audio format (currently only 'wav' supported)
52
+ """
53
+ try:
54
+ result = await service.synthesize(
55
+ model_id=request.model,
56
+ text=request.input,
57
+ voice=request.voice,
58
+ speed=request.speed,
59
+ output_format=request.response_format,
60
+ )
61
+
62
+ return Response(
63
+ content=result.audio_data,
64
+ media_type=f"audio/{result.format}",
65
+ headers={
66
+ "Content-Disposition": f"attachment; filename=speech.{result.format}",
67
+ "X-Duration": str(result.duration),
68
+ "X-Sample-Rate": str(result.sample_rate),
69
+ },
70
+ )
71
+
72
+ except ValueError as e:
73
+ raise HTTPException(status_code=400, detail=str(e))
74
+ except Exception as e:
75
+ raise HTTPException(status_code=500, detail=f"TTS error: {str(e)}")
76
+
77
+
78
+ @router.get("/voices", response_model=VoicesResponse)
79
+ async def list_voices(model: str | None = None, service: TTSService = Depends(get_tts_service)):
80
+ """
81
+ List available TTS voices
82
+
83
+ Returns a list of all available voices that can be used for speech synthesis.
84
+
85
+ - **model**: Optional model ID to list voices for a specific model
86
+ """
87
+ try:
88
+ voices = await service.get_voices(model_id=model)
89
+
90
+ voice_infos = [VoiceInfo(id=v.id, name=v.name, language=v.language, gender=v.gender) for v in voices]
91
+
92
+ return VoicesResponse(voices=voice_infos, total=len(voice_infos))
93
+
94
+ except Exception as e:
95
+ raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
96
+
97
+
98
+ tts_router = router
@@ -0,0 +1,9 @@
1
+ from .model_service import ModelService
2
+ from .transcription_service import TranscriptionService
3
+ from .tts_service import TTSService
4
+
5
+ __all__ = [
6
+ "TranscriptionService",
7
+ "ModelService",
8
+ "TTSService",
9
+ ]
@@ -0,0 +1,96 @@
1
+ from collections.abc import AsyncIterator
2
+
3
+ from vocal_core import ModelRegistry
4
+
5
+ from ..models.model import (
6
+ ModelDownloadProgress,
7
+ ModelInfo,
8
+ )
9
+
10
+
11
+ class ModelService:
12
+ """Service for managing models"""
13
+
14
+ def __init__(self, registry: ModelRegistry):
15
+ self.registry = registry
16
+ self._download_status: dict[str, ModelDownloadProgress] = {}
17
+
18
+ async def list_models(self, status_filter: str | None = None, task: str | None = None) -> list[ModelInfo]:
19
+ """List all available models"""
20
+ models = await self.registry.list_models(task=task, status_filter=status_filter)
21
+
22
+ return [self._convert_model_info(m) for m in models]
23
+
24
+ async def get_model(self, model_id: str) -> ModelInfo | None:
25
+ """Get model information"""
26
+ model = await self.registry.get_model(model_id)
27
+ if not model:
28
+ return None
29
+ return self._convert_model_info(model)
30
+
31
+ async def download_model(self, model_id: str, quantization: str | None = None) -> AsyncIterator[ModelDownloadProgress]:
32
+ """Download a model"""
33
+ self._download_status[model_id] = ModelDownloadProgress(
34
+ model_id=model_id,
35
+ status="downloading",
36
+ progress=0.0,
37
+ downloaded_bytes=0,
38
+ total_bytes=0,
39
+ message="Starting download...",
40
+ )
41
+
42
+ try:
43
+ async for downloaded, total, status in self.registry.download_model(model_id, quantization=quantization):
44
+ progress = (downloaded / total) if total > 0 else 0.0
45
+
46
+ self._download_status[model_id] = ModelDownloadProgress(
47
+ model_id=model_id,
48
+ status=status.value,
49
+ progress=progress,
50
+ downloaded_bytes=downloaded,
51
+ total_bytes=total,
52
+ message=f"Downloaded {downloaded}/{total} bytes",
53
+ )
54
+
55
+ yield self._download_status[model_id]
56
+
57
+ except Exception as e:
58
+ self._download_status[model_id] = ModelDownloadProgress(
59
+ model_id=model_id,
60
+ status="error",
61
+ progress=0.0,
62
+ downloaded_bytes=0,
63
+ total_bytes=0,
64
+ message=f"Download failed: {str(e)}",
65
+ )
66
+ yield self._download_status[model_id]
67
+
68
+ async def get_download_status(self, model_id: str) -> ModelDownloadProgress | None:
69
+ """Get download status for a model"""
70
+ return self._download_status.get(model_id)
71
+
72
+ async def delete_model(self, model_id: str) -> bool:
73
+ """Delete a downloaded model"""
74
+ return await self.registry.delete_model(model_id)
75
+
76
+ def _convert_model_info(self, model) -> ModelInfo:
77
+ """Convert core ModelInfo to API ModelInfo"""
78
+ return ModelInfo(
79
+ id=model.id,
80
+ name=model.name,
81
+ provider=model.provider.value,
82
+ description=model.description,
83
+ size=model.size,
84
+ size_readable=model.size_readable,
85
+ parameters=model.parameters,
86
+ languages=model.languages,
87
+ backend=model.backend.value,
88
+ status=model.status.value,
89
+ source_url=model.source_url,
90
+ license=model.license,
91
+ recommended_vram=model.recommended_vram,
92
+ task=model.task.value,
93
+ local_path=model.local_path,
94
+ created_at=model.created_at,
95
+ updated_at=model.updated_at,
96
+ )
@@ -0,0 +1,196 @@
1
+ import asyncio
2
+ import tempfile
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import aiofiles
7
+ from fastapi import UploadFile
8
+
9
+ from vocal_core import FasterWhisperAdapter, ModelRegistry, TranscriptionResult
10
+
11
+ from ..models.transcription import (
12
+ TranscriptionRequest,
13
+ TranscriptionResponse,
14
+ TranscriptionSegment,
15
+ TranscriptionWord,
16
+ )
17
+
18
+
19
+ class TranscriptionService:
20
+ """Service for handling transcription requests with Ollama-style keep-alive"""
21
+
22
+ def __init__(self, registry: ModelRegistry, keep_alive_seconds: int = 300):
23
+ self.registry = registry
24
+ self.adapters: dict[str, FasterWhisperAdapter] = {}
25
+ self.last_used: dict[str, float] = {}
26
+ self.keep_alive_seconds = keep_alive_seconds
27
+ self._cleanup_task = None
28
+
29
+ async def start_cleanup_task(self):
30
+ """Start background task to cleanup unused models"""
31
+ if self._cleanup_task is None:
32
+ self._cleanup_task = asyncio.create_task(self._cleanup_loop())
33
+
34
+ async def _cleanup_loop(self):
35
+ """Background task to unload models after keep_alive expires"""
36
+ while True:
37
+ try:
38
+ await asyncio.sleep(60)
39
+ current_time = time.time()
40
+ models_to_unload = []
41
+
42
+ for model_id, last_used_time in self.last_used.items():
43
+ if current_time - last_used_time > self.keep_alive_seconds:
44
+ models_to_unload.append(model_id)
45
+
46
+ for model_id in models_to_unload:
47
+ if model_id in self.adapters:
48
+ adapter = self.adapters[model_id]
49
+ await adapter.unload_model()
50
+ del self.adapters[model_id]
51
+ del self.last_used[model_id]
52
+
53
+ except Exception:
54
+ pass
55
+
56
+ async def transcribe(self, file: UploadFile, request: TranscriptionRequest) -> TranscriptionResponse:
57
+ """
58
+ Transcribe audio file
59
+
60
+ Args:
61
+ file: Uploaded audio file
62
+ request: Transcription parameters
63
+
64
+ Returns:
65
+ TranscriptionResponse with text and metadata
66
+ """
67
+ model_id = request.model
68
+
69
+ model_info = await self.registry.get_model(model_id)
70
+ if not model_info:
71
+ raise ValueError(f"Model {model_id} not found in registry")
72
+
73
+ model_path = self.registry.get_model_path(model_id)
74
+
75
+ if not model_path:
76
+ raise ValueError(f"Model {model_id} not downloaded. Download it first: POST /v1/models/{model_id}/download")
77
+
78
+ adapter = await self._get_or_create_adapter(model_id, model_path)
79
+ self.last_used[model_id] = time.time()
80
+
81
+ temp_file = None
82
+ temp_path = None
83
+ try:
84
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".audio")
85
+ temp_path = temp_file.name
86
+ temp_file.close()
87
+
88
+ content = await file.read()
89
+
90
+ async with aiofiles.open(temp_path, "wb") as f:
91
+ await f.write(content)
92
+
93
+ word_timestamps = "word" in request.timestamp_granularities
94
+
95
+ result = await adapter.transcribe(
96
+ audio=temp_path,
97
+ language=request.language,
98
+ task="transcribe",
99
+ temperature=request.temperature,
100
+ word_timestamps=word_timestamps,
101
+ )
102
+
103
+ return self._convert_result(result)
104
+
105
+ finally:
106
+ if temp_path and Path(temp_path).exists():
107
+ Path(temp_path).unlink()
108
+
109
+ async def translate(self, file: UploadFile, model_id: str) -> TranscriptionResponse:
110
+ """
111
+ Translate audio to English
112
+
113
+ Args:
114
+ file: Uploaded audio file
115
+ model_id: Model to use
116
+
117
+ Returns:
118
+ TranscriptionResponse with translated text
119
+ """
120
+ model_path = self.registry.get_model_path(model_id)
121
+ if not model_path:
122
+ raise ValueError(f"Model {model_id} not downloaded")
123
+
124
+ adapter = await self._get_or_create_adapter(model_id, model_path)
125
+ self.last_used[model_id] = time.time()
126
+
127
+ temp_file = None
128
+ temp_path = None
129
+ try:
130
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".audio")
131
+ temp_path = temp_file.name
132
+ temp_file.close()
133
+
134
+ content = await file.read()
135
+
136
+ async with aiofiles.open(temp_path, "wb") as f:
137
+ await f.write(content)
138
+
139
+ result = await adapter.transcribe(
140
+ audio=temp_path,
141
+ task="translate",
142
+ )
143
+
144
+ return self._convert_result(result)
145
+
146
+ finally:
147
+ if temp_path and Path(temp_path).exists():
148
+ Path(temp_path).unlink()
149
+
150
+ async def _get_or_create_adapter(self, model_id: str, model_path: Path) -> FasterWhisperAdapter:
151
+ """Get or create adapter for model"""
152
+ if model_id not in self.adapters:
153
+ adapter = FasterWhisperAdapter()
154
+ await adapter.load_model(model_path)
155
+ self.adapters[model_id] = adapter
156
+
157
+ return self.adapters[model_id]
158
+
159
+ def _convert_result(self, result: TranscriptionResult) -> TranscriptionResponse:
160
+ """Convert core TranscriptionResult to API TranscriptionResponse"""
161
+ segments = None
162
+ if result.segments:
163
+ segments = [
164
+ TranscriptionSegment(
165
+ id=seg.id,
166
+ start=seg.start,
167
+ end=seg.end,
168
+ text=seg.text,
169
+ tokens=seg.tokens,
170
+ temperature=seg.temperature,
171
+ avg_logprob=seg.avg_logprob,
172
+ compression_ratio=seg.compression_ratio,
173
+ no_speech_prob=seg.no_speech_prob,
174
+ )
175
+ for seg in result.segments
176
+ ]
177
+
178
+ words = None
179
+ if result.words:
180
+ words = [
181
+ TranscriptionWord(
182
+ word=w.word,
183
+ start=w.start,
184
+ end=w.end,
185
+ probability=w.probability,
186
+ )
187
+ for w in result.words
188
+ ]
189
+
190
+ return TranscriptionResponse(
191
+ text=result.text,
192
+ language=result.language,
193
+ duration=result.duration,
194
+ segments=segments,
195
+ words=words,
196
+ )
@@ -0,0 +1,139 @@
1
+ import asyncio
2
+ import time
3
+ from pathlib import Path
4
+
5
+ from vocal_core import ModelRegistry
6
+ from vocal_core.adapters.tts import SimpleTTSAdapter, TTSAdapter, TTSResult, Voice
7
+
8
+
9
+ class TTSService:
10
+ """Service for handling TTS operations with Ollama-style keep-alive"""
11
+
12
+ def __init__(self, registry: ModelRegistry, keep_alive_seconds: int = 300):
13
+ self.registry = registry
14
+ self.adapters: dict[str, TTSAdapter] = {}
15
+ self.last_used: dict[str, float] = {}
16
+ self.keep_alive_seconds = keep_alive_seconds
17
+ self._cleanup_task = None
18
+
19
+ async def start_cleanup_task(self):
20
+ """Start background task to cleanup unused models"""
21
+ if self._cleanup_task is None:
22
+ self._cleanup_task = asyncio.create_task(self._cleanup_loop())
23
+
24
+ async def _cleanup_loop(self):
25
+ """Background task to unload models after keep_alive expires"""
26
+ while True:
27
+ try:
28
+ await asyncio.sleep(60)
29
+ current_time = time.time()
30
+ models_to_unload = []
31
+
32
+ for model_id, last_used_time in self.last_used.items():
33
+ if current_time - last_used_time > self.keep_alive_seconds:
34
+ models_to_unload.append(model_id)
35
+
36
+ for model_id in models_to_unload:
37
+ if model_id in self.adapters:
38
+ adapter = self.adapters[model_id]
39
+ if hasattr(adapter, "unload_model"):
40
+ await adapter.unload_model()
41
+ del self.adapters[model_id]
42
+ del self.last_used[model_id]
43
+
44
+ except Exception:
45
+ pass
46
+
47
+ async def synthesize(
48
+ self,
49
+ model_id: str,
50
+ text: str,
51
+ voice: str | None = None,
52
+ speed: float = 1.0,
53
+ output_format: str = "wav",
54
+ ) -> TTSResult:
55
+ """
56
+ Synthesize text to speech using specified model
57
+
58
+ Args:
59
+ model_id: TTS model identifier (use "pyttsx3" for system TTS)
60
+ text: Text to convert to speech
61
+ voice: Voice ID to use
62
+ speed: Speech speed multiplier
63
+ output_format: Output audio format
64
+
65
+ Returns:
66
+ TTSResult with audio data
67
+ """
68
+ if model_id == "pyttsx3":
69
+ adapter = await self._get_or_create_simple_adapter()
70
+ self.last_used[model_id] = time.time()
71
+ return await adapter.synthesize(text=text, voice=voice, speed=speed, output_format=output_format)
72
+
73
+ model_info = await self.registry.get_model(model_id)
74
+ if not model_info:
75
+ raise ValueError(f"Model {model_id} not found in registry")
76
+
77
+ if model_info.task.value != "tts":
78
+ raise ValueError(f"Model {model_id} is not a TTS model (task: {model_info.task})")
79
+
80
+ model_path = self.registry.get_model_path(model_id)
81
+
82
+ if not model_path:
83
+ raise ValueError(f"Model {model_id} not downloaded. Download it first: POST /v1/models/{model_id}/download")
84
+
85
+ adapter = await self._get_or_create_adapter(model_id, model_path, model_info.backend.value)
86
+ self.last_used[model_id] = time.time()
87
+
88
+ return await adapter.synthesize(text=text, voice=voice, speed=speed, output_format=output_format)
89
+
90
+ async def get_voices(self, model_id: str | None = None) -> list[Voice]:
91
+ """
92
+ Get list of available voices
93
+
94
+ Args:
95
+ model_id: Optional model ID to get voices for specific model
96
+
97
+ Returns:
98
+ List of Voice objects
99
+ """
100
+ if not model_id or model_id == "pyttsx3":
101
+ adapter = await self._get_or_create_simple_adapter()
102
+ self.last_used["pyttsx3"] = time.time()
103
+ return await adapter.get_voices()
104
+
105
+ model_path = self.registry.get_model_path(model_id)
106
+ if not model_path:
107
+ raise ValueError(f"Model {model_id} not downloaded")
108
+
109
+ model_info = await self.registry.get_model(model_id)
110
+ if not model_info:
111
+ raise ValueError(f"Model {model_id} not found")
112
+
113
+ adapter = await self._get_or_create_adapter(model_id, model_path, model_info.backend.value)
114
+ self.last_used[model_id] = time.time()
115
+ return await adapter.get_voices()
116
+
117
+ async def _get_or_create_simple_adapter(self) -> SimpleTTSAdapter:
118
+ """Get or create the simple system TTS adapter"""
119
+ if "pyttsx3" not in self.adapters:
120
+ adapter = SimpleTTSAdapter()
121
+ await adapter.load_model(Path("."))
122
+ self.adapters["pyttsx3"] = adapter
123
+ return self.adapters["pyttsx3"]
124
+
125
+ async def _get_or_create_adapter(self, model_id: str, model_path: Path, backend: str) -> TTSAdapter:
126
+ """Get or create TTS adapter for model"""
127
+ if model_id not in self.adapters:
128
+ adapter = self._create_adapter(backend)
129
+ await adapter.load_model(model_path)
130
+ self.adapters[model_id] = adapter
131
+
132
+ return self.adapters[model_id]
133
+
134
+ def _create_adapter(self, backend: str) -> TTSAdapter:
135
+ """Create appropriate TTS adapter based on backend"""
136
+ if backend in ("onnx", "custom"):
137
+ return SimpleTTSAdapter()
138
+ else:
139
+ return SimpleTTSAdapter()